mdconfig: remove the "cluster" option.
[freebsd.git] / sys / fs / nfsclient / nfs_clrpcops.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35
36 #include <sys/cdefs.h>
37 /*
38  * Rpc op calls, generally called from the vnode op calls or through the
39  * buffer cache, for NFS v2, 3 and 4.
40  * These do not normally make any changes to vnode arguments or use
41  * structures that might change between the VFS variants. The returned
42  * arguments are all at the end, after the NFSPROC_T *p one.
43  */
44
45 #include "opt_inet6.h"
46
47 #include <fs/nfs/nfsport.h>
48 #include <fs/nfsclient/nfs.h>
49 #include <sys/extattr.h>
50 #include <sys/sysctl.h>
51 #include <sys/taskqueue.h>
52
53 SYSCTL_DECL(_vfs_nfs);
54
55 static int      nfsignore_eexist = 0;
56 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
57     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
58
59 static int      nfscl_dssameconn = 0;
60 SYSCTL_INT(_vfs_nfs, OID_AUTO, dssameconn, CTLFLAG_RW,
61     &nfscl_dssameconn, 0, "Use same TCP connection to multiple DSs");
62
63 static uint64_t nfs_maxcopyrange = SSIZE_MAX;
64 SYSCTL_U64(_vfs_nfs, OID_AUTO, maxcopyrange, CTLFLAG_RW,
65     &nfs_maxcopyrange, 0, "Max size of a Copy so RPC times reasonable");
66
67 /*
68  * Global variables
69  */
70 extern struct nfsstatsv1 nfsstatsv1;
71 extern int nfs_numnfscbd;
72 extern struct timeval nfsboottime;
73 extern u_int32_t newnfs_false, newnfs_true;
74 extern nfstype nfsv34_type[9];
75 extern int nfsrv_useacl;
76 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
77 extern int nfscl_debuglevel;
78 extern int nfs_pnfsiothreads;
79 extern u_long sb_max_adj;
80 NFSCLSTATEMUTEX;
81 int nfstest_outofseq = 0;
82 int nfscl_assumeposixlocks = 1;
83 int nfscl_enablecallb = 0;
84 short nfsv4_cbport = NFSV4_CBPORT;
85 int nfstest_openallsetattr = 0;
86
87 #define DIRHDSIZ        offsetof(struct dirent, d_name)
88
89 /*
90  * nfscl_getsameserver() can return one of three values:
91  * NFSDSP_USETHISSESSION - Use this session for the DS.
92  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
93  *     session.
94  * NFSDSP_NOTFOUND - No matching server was found.
95  */
96 enum nfsclds_state {
97         NFSDSP_USETHISSESSION = 0,
98         NFSDSP_SEQTHISSESSION = 1,
99         NFSDSP_NOTFOUND = 2,
100 };
101
102 /*
103  * Do a write RPC on a DS data file, using this structure for the arguments,
104  * so that this function can be executed by a separate kernel process.
105  */
106 struct nfsclwritedsdorpc {
107         int                     done;
108         int                     inprog;
109         struct task             tsk;
110         struct vnode            *vp;
111         int                     iomode;
112         int                     must_commit;
113         nfsv4stateid_t          *stateidp;
114         struct nfsclds          *dsp;
115         uint64_t                off;
116         int                     len;
117 #ifdef notyet
118         int                     advise;
119 #endif
120         struct nfsfh            *fhp;
121         struct mbuf             *m;
122         int                     vers;
123         int                     minorvers;
124         struct ucred            *cred;
125         NFSPROC_T               *p;
126         int                     err;
127 };
128
129 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
130     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *);
131 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
132     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *);
133 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
134     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
135     int);
136 static int nfsrpc_deallocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
137     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *);
138 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
139     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
140     struct nfsvattr *, struct nfsfh **, int *, int *);
141 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
142     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
143     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
144     int *, int *);
145 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
146     struct nfscllockowner *, u_int64_t, u_int64_t,
147     u_int32_t, struct ucred *, NFSPROC_T *, int);
148 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
149     struct acl *, nfsv4stateid_t *);
150 static int nfsrpc_layouterror(struct nfsmount *, uint8_t *, int, uint64_t,
151     uint64_t, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, uint32_t,
152     uint32_t, char *);
153 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
154     uint32_t, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
155     struct ucred *, NFSPROC_T *);
156 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
157     struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **,
158     NFSPROC_T *);
159 static void nfscl_initsessionslots(struct nfsclsession *);
160 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
161     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
162     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
163     NFSPROC_T *);
164 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
165     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
166     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
167     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
168 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
169     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
170     struct ucred *, NFSPROC_T *);
171 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
172     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
173     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
174 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
175     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
176     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
177 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
178     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
179     struct ucred *, NFSPROC_T *);
180 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
181     struct nfsclds *, struct nfsclds **, uint32_t *);
182 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
183     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
184     NFSPROC_T *);
185 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
186     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
187 #ifdef notyet
188 static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
189     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
190     NFSPROC_T *);
191 static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
192     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
193 #endif
194 static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
195     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *);
196 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
197     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
198 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
199     NFSPROC_T *);
200 static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *,
201     nfsv4stateid_t *, int *, struct nfsclflayouthead *);
202 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
203     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
204     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
205 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
206     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
207     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
208     struct nfsfh **, int *, int *, int *);
209 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
210     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
211     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
212     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
213 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
214     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
215     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
216     struct nfsfh **, int *, int *, int *, nfsv4stateid_t *,
217     int, int, int, int *, struct nfsclflayouthead *, int *);
218 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
219     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
220     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *);
221 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
222     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
223     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
224 static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *,
225     nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *,
226     struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *);
227 static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *,
228     int, struct nfsvattr *, int *, struct ucred *);
229 static struct mbuf *nfsm_split(struct mbuf *, uint64_t);
230 static void nfscl_statfs(struct vnode *, struct ucred *, NFSPROC_T *);
231
232 int nfs_pnfsio(task_fn_t *, void *);
233
234 /*
235  * nfs null call from vfs.
236  */
237 int
238 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
239 {
240         int error;
241         struct nfsrv_descript nfsd, *nd = &nfsd;
242
243         NFSCL_REQSTART(nd, NFSPROC_NULL, vp, NULL);
244         error = nfscl_request(nd, vp, p, cred);
245         if (nd->nd_repstat && !error)
246                 error = nd->nd_repstat;
247         m_freem(nd->nd_mrep);
248         return (error);
249 }
250
251 /*
252  * nfs access rpc op.
253  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
254  * modes are changed on the server, accesses might still fail later.
255  */
256 int
257 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
258     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
259 {
260         int error;
261         u_int32_t mode, rmode;
262
263         if (acmode & VREAD)
264                 mode = NFSACCESS_READ;
265         else
266                 mode = 0;
267         if (vp->v_type == VDIR) {
268                 if (acmode & VWRITE)
269                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
270                                  NFSACCESS_DELETE);
271                 if (acmode & VEXEC)
272                         mode |= NFSACCESS_LOOKUP;
273         } else {
274                 if (acmode & VWRITE)
275                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
276                 if (acmode & VEXEC)
277                         mode |= NFSACCESS_EXECUTE;
278         }
279
280         /*
281          * Now, just call nfsrpc_accessrpc() to do the actual RPC.
282          */
283         error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode);
284
285         /*
286          * The NFS V3 spec does not clarify whether or not
287          * the returned access bits can be a superset of
288          * the ones requested, so...
289          */
290         if (!error && (rmode & mode) != mode)
291                 error = EACCES;
292         return (error);
293 }
294
295 /*
296  * The actual rpc, separated out for Darwin.
297  */
298 int
299 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
300     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep)
301 {
302         u_int32_t *tl;
303         u_int32_t supported, rmode;
304         int error;
305         struct nfsrv_descript nfsd, *nd = &nfsd;
306         nfsattrbit_t attrbits;
307         struct nfsmount *nmp;
308         struct nfsnode *np;
309
310         *attrflagp = 0;
311         supported = mode;
312         nmp = VFSTONFS(vp->v_mount);
313         np = VTONFS(vp);
314         if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
315             nmp->nm_fhsize == 0) {
316                 /* Attempt to get the actual root file handle. */
317                 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, p);
318                 if (error != 0)
319                         return (EACCES);
320                 if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
321                         nfscl_statfs(vp, cred, p);
322         }
323         NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp, cred);
324         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
325         *tl = txdr_unsigned(mode);
326         if (nd->nd_flag & ND_NFSV4) {
327                 /*
328                  * And do a Getattr op.
329                  */
330                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
331                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
332                 NFSGETATTR_ATTRBIT(&attrbits);
333                 (void) nfsrv_putattrbit(nd, &attrbits);
334         }
335         error = nfscl_request(nd, vp, p, cred);
336         if (error)
337                 return (error);
338         if (nd->nd_flag & ND_NFSV3) {
339                 error = nfscl_postop_attr(nd, nap, attrflagp);
340                 if (error)
341                         goto nfsmout;
342         }
343         if (!nd->nd_repstat) {
344                 if (nd->nd_flag & ND_NFSV4) {
345                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
346                         supported = fxdr_unsigned(u_int32_t, *tl++);
347                 } else {
348                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
349                 }
350                 rmode = fxdr_unsigned(u_int32_t, *tl);
351                 if (nd->nd_flag & ND_NFSV4)
352                         error = nfscl_postop_attr(nd, nap, attrflagp);
353
354                 /*
355                  * It's not obvious what should be done about
356                  * unsupported access modes. For now, be paranoid
357                  * and clear the unsupported ones.
358                  */
359                 rmode &= supported;
360                 *rmodep = rmode;
361         } else
362                 error = nd->nd_repstat;
363 nfsmout:
364         m_freem(nd->nd_mrep);
365         return (error);
366 }
367
368 /*
369  * nfs open rpc
370  */
371 int
372 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
373 {
374         struct nfsclopen *op;
375         struct nfscldeleg *dp;
376         struct nfsfh *nfhp;
377         struct nfsnode *np = VTONFS(vp);
378         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
379         u_int32_t mode, clidrev;
380         int ret, newone, error, expireret = 0, retrycnt;
381
382         /*
383          * For NFSv4, Open Ops are only done on Regular Files.
384          */
385         if (vp->v_type != VREG)
386                 return (0);
387         mode = 0;
388         if (amode & FREAD)
389                 mode |= NFSV4OPEN_ACCESSREAD;
390         if (amode & FWRITE)
391                 mode |= NFSV4OPEN_ACCESSWRITE;
392         nfhp = np->n_fhp;
393
394         retrycnt = 0;
395         do {
396             dp = NULL;
397             error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
398                 cred, p, NULL, &op, &newone, &ret, 1, true);
399             if (error) {
400                 return (error);
401             }
402             if (nmp->nm_clp != NULL)
403                 clidrev = nmp->nm_clp->nfsc_clientidrev;
404             else
405                 clidrev = 0;
406             if (ret == NFSCLOPEN_DOOPEN) {
407                 if (np->n_v4 != NULL) {
408                         /*
409                          * For the first attempt, try and get a layout, if
410                          * pNFS is enabled for the mount.
411                          */
412                         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
413                             nfs_numnfscbd == 0 ||
414                             (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
415                                 error = nfsrpc_openrpc(nmp, vp,
416                                     np->n_v4->n4_data,
417                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
418                                     np->n_fhp->nfh_len, mode, op,
419                                     NFS4NODENAME(np->n_v4),
420                                     np->n_v4->n4_namelen,
421                                     &dp, 0, 0x0, cred, p, 0, 0);
422                         else
423                                 error = nfsrpc_getopenlayout(nmp, vp,
424                                     np->n_v4->n4_data,
425                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
426                                     np->n_fhp->nfh_len, mode, op,
427                                     NFS4NODENAME(np->n_v4),
428                                     np->n_v4->n4_namelen, &dp, cred, p);
429                         if (dp != NULL) {
430                                 NFSLOCKNODE(np);
431                                 np->n_flag &= ~NDELEGMOD;
432                                 /*
433                                  * Invalidate the attribute cache, so that
434                                  * attributes that pre-date the issue of a
435                                  * delegation are not cached, since the
436                                  * cached attributes will remain valid while
437                                  * the delegation is held.
438                                  */
439                                 NFSINVALATTRCACHE(np);
440                                 NFSUNLOCKNODE(np);
441                                 (void) nfscl_deleg(nmp->nm_mountp,
442                                     op->nfso_own->nfsow_clp,
443                                     nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
444                         }
445                 } else if (NFSHASNFSV4N(nmp)) {
446                         /*
447                          * For the first attempt, try and get a layout, if
448                          * pNFS is enabled for the mount.
449                          */
450                         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
451                             nfs_numnfscbd == 0 ||
452                             (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
453                                 error = nfsrpc_openrpc(nmp, vp, nfhp->nfh_fh,
454                                     nfhp->nfh_len, nfhp->nfh_fh, nfhp->nfh_len,
455                                     mode, op, NULL, 0, &dp, 0, 0x0, cred, p, 0,
456                                     0);
457                         else
458                                 error = nfsrpc_getopenlayout(nmp, vp,
459                                     nfhp->nfh_fh, nfhp->nfh_len, nfhp->nfh_fh,
460                                     nfhp->nfh_len, mode, op, NULL, 0, &dp,
461                                     cred, p);
462                         if (dp != NULL) {
463                                 NFSLOCKNODE(np);
464                                 np->n_flag &= ~NDELEGMOD;
465                                 /*
466                                  * Invalidate the attribute cache, so that
467                                  * attributes that pre-date the issue of a
468                                  * delegation are not cached, since the
469                                  * cached attributes will remain valid while
470                                  * the delegation is held.
471                                  */
472                                 NFSINVALATTRCACHE(np);
473                                 NFSUNLOCKNODE(np);
474                                 (void) nfscl_deleg(nmp->nm_mountp,
475                                     op->nfso_own->nfsow_clp,
476                                     nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
477                         }
478                 } else {
479                         error = EIO;
480                 }
481                 newnfs_copyincred(cred, &op->nfso_cred);
482             } else if (ret == NFSCLOPEN_SETCRED)
483                 /*
484                  * This is a new local open on a delegation. It needs
485                  * to have credentials so that an open can be done
486                  * against the server during recovery.
487                  */
488                 newnfs_copyincred(cred, &op->nfso_cred);
489
490             /*
491              * nfso_opencnt is the count of how many VOP_OPEN()s have
492              * been done on this Open successfully and a VOP_CLOSE()
493              * is expected for each of these.
494              * If error is non-zero, don't increment it, since the Open
495              * hasn't succeeded yet.
496              */
497             if (!error) {
498                 op->nfso_opencnt++;
499                 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) {
500                     NFSLOCKNODE(np);
501                     np->n_openstateid = op;
502                     NFSUNLOCKNODE(np);
503                 }
504             }
505             nfscl_openrelease(nmp, op, error, newone);
506             if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
507                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
508                 error == NFSERR_BADSESSION) {
509                 (void) nfs_catnap(PZERO, error, "nfs_open");
510             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
511                 && clidrev != 0) {
512                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
513                 retrycnt++;
514             }
515         } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
516             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
517             error == NFSERR_BADSESSION ||
518             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
519              expireret == 0 && clidrev != 0 && retrycnt < 4));
520         if (error && retrycnt >= 4)
521                 error = EIO;
522         return (error);
523 }
524
525 /*
526  * the actual open rpc
527  */
528 int
529 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
530     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
531     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
532     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
533     int syscred, int recursed)
534 {
535         u_int32_t *tl;
536         struct nfsrv_descript nfsd, *nd = &nfsd;
537         struct nfscldeleg *dp, *ndp = NULL;
538         struct nfsvattr nfsva;
539         u_int32_t rflags, deleg;
540         nfsattrbit_t attrbits;
541         int error, ret, acesize, limitby;
542         struct nfsclsession *tsep;
543
544         dp = *dpp;
545         *dpp = NULL;
546         nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
547             cred);
548         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
549         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
550         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
551         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
552         tsep = nfsmnt_mdssession(nmp);
553         *tl++ = tsep->nfsess_clientid.lval[0];
554         *tl = tsep->nfsess_clientid.lval[1];
555         (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
556         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
557         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
558         if (reclaim) {
559                 *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
560                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
561                 *tl = txdr_unsigned(delegtype);
562         } else {
563                 if (dp != NULL) {
564                         if (NFSHASNFSV4N(nmp))
565                                 *tl = txdr_unsigned(
566                                     NFSV4OPEN_CLAIMDELEGATECURFH);
567                         else
568                                 *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
569                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
570                         if (NFSHASNFSV4N(nmp))
571                                 *tl++ = 0;
572                         else
573                                 *tl++ = dp->nfsdl_stateid.seqid;
574                         *tl++ = dp->nfsdl_stateid.other[0];
575                         *tl++ = dp->nfsdl_stateid.other[1];
576                         *tl = dp->nfsdl_stateid.other[2];
577                         if (!NFSHASNFSV4N(nmp))
578                                 (void)nfsm_strtom(nd, name, namelen);
579                 } else if (NFSHASNFSV4N(nmp)) {
580                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
581                 } else {
582                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
583                         (void)nfsm_strtom(nd, name, namelen);
584                 }
585         }
586         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
587         *tl = txdr_unsigned(NFSV4OP_GETATTR);
588         NFSZERO_ATTRBIT(&attrbits);
589         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
590         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
591         (void) nfsrv_putattrbit(nd, &attrbits);
592         if (syscred)
593                 nd->nd_flag |= ND_USEGSSNAME;
594         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
595             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
596         if (error)
597                 return (error);
598         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
599         if (nd->nd_repstat == 0 || (nd->nd_repstat == NFSERR_DELAY &&
600             reclaim != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0)) {
601                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
602                     6 * NFSX_UNSIGNED);
603                 op->nfso_stateid.seqid = *tl++;
604                 op->nfso_stateid.other[0] = *tl++;
605                 op->nfso_stateid.other[1] = *tl++;
606                 op->nfso_stateid.other[2] = *tl;
607                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
608                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
609                 if (error)
610                         goto nfsmout;
611                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
612                 deleg = fxdr_unsigned(u_int32_t, *tl);
613                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
614                     deleg == NFSV4OPEN_DELEGATEWRITE) {
615                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
616                               NFSCLFLAGS_FIRSTDELEG))
617                                 op->nfso_own->nfsow_clp->nfsc_flags |=
618                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
619                         ndp = malloc(
620                             sizeof (struct nfscldeleg) + newfhlen,
621                             M_NFSCLDELEG, M_WAITOK);
622                         LIST_INIT(&ndp->nfsdl_owner);
623                         LIST_INIT(&ndp->nfsdl_lock);
624                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
625                         ndp->nfsdl_fhlen = newfhlen;
626                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
627                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
628                         nfscl_lockinit(&ndp->nfsdl_rwlock);
629                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
630                             NFSX_UNSIGNED);
631                         ndp->nfsdl_stateid.seqid = *tl++;
632                         ndp->nfsdl_stateid.other[0] = *tl++;
633                         ndp->nfsdl_stateid.other[1] = *tl++;
634                         ndp->nfsdl_stateid.other[2] = *tl++;
635                         ret = fxdr_unsigned(int, *tl);
636                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
637                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
638                                 /*
639                                  * Indicates how much the file can grow.
640                                  */
641                                 NFSM_DISSECT(tl, u_int32_t *,
642                                     3 * NFSX_UNSIGNED);
643                                 limitby = fxdr_unsigned(int, *tl++);
644                                 switch (limitby) {
645                                 case NFSV4OPEN_LIMITSIZE:
646                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
647                                         break;
648                                 case NFSV4OPEN_LIMITBLOCKS:
649                                         ndp->nfsdl_sizelimit =
650                                             fxdr_unsigned(u_int64_t, *tl++);
651                                         ndp->nfsdl_sizelimit *=
652                                             fxdr_unsigned(u_int64_t, *tl);
653                                         break;
654                                 default:
655                                         error = NFSERR_BADXDR;
656                                         goto nfsmout;
657                                 }
658                         } else {
659                                 ndp->nfsdl_flags = NFSCLDL_READ;
660                         }
661                         if (ret)
662                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
663                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
664                             &ret, &acesize, p);
665                         if (error)
666                                 goto nfsmout;
667                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
668                         error = NFSERR_BADXDR;
669                         goto nfsmout;
670                 }
671                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
672                 /* If the 2nd element == NFS_OK, the Getattr succeeded. */
673                 if (*++tl == 0) {
674                         KASSERT(nd->nd_repstat == 0,
675                             ("nfsrpc_openrpc: Getattr repstat"));
676                         error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
677                             NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
678                             NULL, NULL, NULL, p, cred);
679                         if (error)
680                                 goto nfsmout;
681                 }
682                 if (ndp != NULL) {
683                         if (reclaim != 0 && dp != NULL) {
684                                 ndp->nfsdl_change = dp->nfsdl_change;
685                                 ndp->nfsdl_modtime = dp->nfsdl_modtime;
686                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
687                         } else if (nd->nd_repstat == 0) {
688                                 ndp->nfsdl_change = nfsva.na_filerev;
689                                 ndp->nfsdl_modtime = nfsva.na_mtime;
690                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
691                         } else
692                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
693                 }
694                 nd->nd_repstat = 0;
695                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
696                     do {
697                         ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
698                             cred, p);
699                         if (ret == NFSERR_DELAY)
700                             (void) nfs_catnap(PZERO, ret, "nfs_open");
701                     } while (ret == NFSERR_DELAY);
702                     error = ret;
703                 }
704                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
705                     nfscl_assumeposixlocks)
706                     op->nfso_posixlock = 1;
707                 else
708                     op->nfso_posixlock = 0;
709
710                 /*
711                  * If the server is handing out delegations, but we didn't
712                  * get one because an OpenConfirm was required, try the
713                  * Open again, to get a delegation. This is a harmless no-op,
714                  * from a server's point of view.
715                  */
716                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
717                     (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
718                     && !error && dp == NULL && ndp == NULL && !recursed) {
719                     do {
720                         ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
721                             newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
722                             cred, p, syscred, 1);
723                         if (ret == NFSERR_DELAY)
724                             (void) nfs_catnap(PZERO, ret, "nfs_open2");
725                     } while (ret == NFSERR_DELAY);
726                     if (ret) {
727                         if (ndp != NULL) {
728                                 free(ndp, M_NFSCLDELEG);
729                                 ndp = NULL;
730                         }
731                         if (ret == NFSERR_STALECLIENTID ||
732                             ret == NFSERR_STALEDONTRECOVER ||
733                             ret == NFSERR_BADSESSION)
734                                 error = ret;
735                     }
736                 }
737         }
738         if (nd->nd_repstat != 0 && error == 0)
739                 error = nd->nd_repstat;
740         if (error == NFSERR_STALECLIENTID)
741                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
742 nfsmout:
743         if (!error)
744                 *dpp = ndp;
745         else if (ndp != NULL)
746                 free(ndp, M_NFSCLDELEG);
747         m_freem(nd->nd_mrep);
748         return (error);
749 }
750
751 /*
752  * open downgrade rpc
753  */
754 int
755 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
756     struct ucred *cred, NFSPROC_T *p)
757 {
758         u_int32_t *tl;
759         struct nfsrv_descript nfsd, *nd = &nfsd;
760         int error;
761
762         NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp, cred);
763         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
764         if (NFSHASNFSV4N(VFSTONFS(vp->v_mount)))
765                 *tl++ = 0;
766         else
767                 *tl++ = op->nfso_stateid.seqid;
768         *tl++ = op->nfso_stateid.other[0];
769         *tl++ = op->nfso_stateid.other[1];
770         *tl++ = op->nfso_stateid.other[2];
771         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
772         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
773         *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
774         error = nfscl_request(nd, vp, p, cred);
775         if (error)
776                 return (error);
777         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
778         if (!nd->nd_repstat) {
779                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
780                 op->nfso_stateid.seqid = *tl++;
781                 op->nfso_stateid.other[0] = *tl++;
782                 op->nfso_stateid.other[1] = *tl++;
783                 op->nfso_stateid.other[2] = *tl;
784         }
785         if (nd->nd_repstat && error == 0)
786                 error = nd->nd_repstat;
787         if (error == NFSERR_STALESTATEID)
788                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
789 nfsmout:
790         m_freem(nd->nd_mrep);
791         return (error);
792 }
793
794 /*
795  * V4 Close operation.
796  */
797 int
798 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
799 {
800         struct nfsclclient *clp;
801         int error;
802
803         if (vp->v_type != VREG)
804                 return (0);
805         if (doclose)
806                 error = nfscl_doclose(vp, &clp, p);
807         else {
808                 error = nfscl_getclose(vp, &clp);
809                 if (error == 0)
810                         nfscl_clientrelease(clp);
811         }
812         return (error);
813 }
814
815 /*
816  * Close the open.
817  */
818 int
819 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p,
820     bool loop_on_delayed, bool freeop)
821 {
822         struct nfsrv_descript nfsd, *nd = &nfsd;
823         struct nfscllockowner *lp, *nlp;
824         struct nfscllock *lop, *nlop;
825         struct ucred *tcred;
826         u_int64_t off = 0, len = 0;
827         u_int32_t type = NFSV4LOCKT_READ;
828         int error, do_unlock, trycnt;
829         bool own_not_null;
830
831         tcred = newnfs_getcred();
832         newnfs_copycred(&op->nfso_cred, tcred);
833         /*
834          * (Theoretically this could be done in the same
835          *  compound as the close, but having multiple
836          *  sequenced Ops in the same compound might be
837          *  too scary for some servers.)
838          */
839         if (op->nfso_posixlock) {
840                 off = 0;
841                 len = NFS64BITSSET;
842                 type = NFSV4LOCKT_READ;
843         }
844
845         /*
846          * Since this function is only called from VOP_INACTIVE(), no
847          * other thread will be manipulating this Open. As such, the
848          * lock lists are not being changed by other threads, so it should
849          * be safe to do this without locking.
850          */
851         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
852                 do_unlock = 1;
853                 LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
854                         if (op->nfso_posixlock == 0) {
855                                 off = lop->nfslo_first;
856                                 len = lop->nfslo_end - lop->nfslo_first;
857                                 if (lop->nfslo_type == F_WRLCK)
858                                         type = NFSV4LOCKT_WRITE;
859                                 else
860                                         type = NFSV4LOCKT_READ;
861                         }
862                         if (do_unlock) {
863                                 trycnt = 0;
864                                 do {
865                                         error = nfsrpc_locku(nd, nmp, lp, off,
866                                             len, type, tcred, p, 0);
867                                         if ((nd->nd_repstat == NFSERR_GRACE ||
868                                             nd->nd_repstat == NFSERR_DELAY) &&
869                                             error == 0)
870                                                 (void) nfs_catnap(PZERO,
871                                                     (int)nd->nd_repstat,
872                                                     "nfs_close");
873                                 } while ((nd->nd_repstat == NFSERR_GRACE ||
874                                     nd->nd_repstat == NFSERR_DELAY) &&
875                                     error == 0 && trycnt++ < 5);
876                                 if (op->nfso_posixlock)
877                                         do_unlock = 0;
878                         }
879                         nfscl_freelock(lop, 0);
880                 }
881                 /*
882                  * Do a ReleaseLockOwner.
883                  * The lock owner name nfsl_owner may be used by other opens for
884                  * other files but the lock_owner4 name that nfsrpc_rellockown()
885                  * puts on the wire has the file handle for this file appended
886                  * to it, so it can be done now.
887                  */
888                 (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
889                     lp->nfsl_open->nfso_fhlen, tcred, p);
890         }
891
892         /*
893          * There could be other Opens for different files on the same
894          * OpenOwner, so locking is required.
895          */
896         own_not_null = false;
897         if (op->nfso_own != NULL) {
898                 own_not_null = true;
899                 NFSLOCKCLSTATE();
900                 nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
901                 NFSUNLOCKCLSTATE();
902         }
903         do {
904                 error = nfscl_tryclose(op, tcred, nmp, p, loop_on_delayed);
905                 if (error == NFSERR_GRACE)
906                         (void) nfs_catnap(PZERO, error, "nfs_close");
907         } while (error == NFSERR_GRACE);
908         if (own_not_null) {
909                 NFSLOCKCLSTATE();
910                 nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
911         }
912
913         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
914                 nfscl_freelockowner(lp, 0);
915         if (freeop && error != NFSERR_DELAY)
916                 nfscl_freeopen(op, 0, true);
917         if (own_not_null)
918                 NFSUNLOCKCLSTATE();
919         NFSFREECRED(tcred);
920         return (error);
921 }
922
923 /*
924  * The actual Close RPC.
925  */
926 int
927 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
928     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
929     int syscred)
930 {
931         u_int32_t *tl;
932         int error;
933
934         nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
935             op->nfso_fhlen, NULL, NULL, 0, 0, cred);
936         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
937         if (NFSHASNFSV4N(nmp)) {
938                 *tl++ = 0;
939                 *tl++ = 0;
940         } else {
941                 *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
942                 *tl++ = op->nfso_stateid.seqid;
943         }
944         *tl++ = op->nfso_stateid.other[0];
945         *tl++ = op->nfso_stateid.other[1];
946         *tl = op->nfso_stateid.other[2];
947         if (syscred)
948                 nd->nd_flag |= ND_USEGSSNAME;
949         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
950             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
951         if (error)
952                 return (error);
953         if (!NFSHASNFSV4N(nmp))
954                 NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
955         if (nd->nd_repstat == 0)
956                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
957         error = nd->nd_repstat;
958         if (!NFSHASNFSV4N(nmp) && error == NFSERR_STALESTATEID)
959                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
960 nfsmout:
961         m_freem(nd->nd_mrep);
962         return (error);
963 }
964
965 /*
966  * V4 Open Confirm RPC.
967  */
968 int
969 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
970     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
971 {
972         u_int32_t *tl;
973         struct nfsrv_descript nfsd, *nd = &nfsd;
974         struct nfsmount *nmp;
975         int error;
976
977         nmp = VFSTONFS(vp->v_mount);
978         if (NFSHASNFSV4N(nmp))
979                 return (0);             /* No confirmation for NFSv4.1. */
980         nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
981             0, 0, NULL);
982         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
983         *tl++ = op->nfso_stateid.seqid;
984         *tl++ = op->nfso_stateid.other[0];
985         *tl++ = op->nfso_stateid.other[1];
986         *tl++ = op->nfso_stateid.other[2];
987         *tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
988         error = nfscl_request(nd, vp, p, cred);
989         if (error)
990                 return (error);
991         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
992         if (!nd->nd_repstat) {
993                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
994                 op->nfso_stateid.seqid = *tl++;
995                 op->nfso_stateid.other[0] = *tl++;
996                 op->nfso_stateid.other[1] = *tl++;
997                 op->nfso_stateid.other[2] = *tl;
998         }
999         error = nd->nd_repstat;
1000         if (error == NFSERR_STALESTATEID)
1001                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
1002 nfsmout:
1003         m_freem(nd->nd_mrep);
1004         return (error);
1005 }
1006
1007 /*
1008  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
1009  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
1010  */
1011 int
1012 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
1013     bool *retokp, struct ucred *cred, NFSPROC_T *p)
1014 {
1015         u_int32_t *tl;
1016         struct nfsrv_descript nfsd;
1017         struct nfsrv_descript *nd = &nfsd;
1018         u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
1019         u_short port;
1020         int error, isinet6 = 0, callblen;
1021         nfsquad_t confirm;
1022         static u_int32_t rev = 0;
1023         struct nfsclds *dsp, *odsp;
1024         struct in6_addr a6;
1025         struct nfsclsession *tsep;
1026         struct rpc_reconupcall recon;
1027         struct nfscl_reconarg *rcp;
1028
1029         if (nfsboottime.tv_sec == 0)
1030                 NFSSETBOOTTIME(nfsboottime);
1031         if (NFSHASNFSV4N(nmp)) {
1032                 error = NFSERR_BADSESSION;
1033                 odsp = dsp = NULL;
1034                 if (retokp != NULL) {
1035                         NFSLOCKMNT(nmp);
1036                         odsp = TAILQ_FIRST(&nmp->nm_sess);
1037                         NFSUNLOCKMNT(nmp);
1038                 }
1039                 if (odsp != NULL) {
1040                         /*
1041                          * When a session already exists, first try a
1042                          * CreateSession with the extant ClientID.
1043                          */
1044                         dsp = malloc(sizeof(struct nfsclds) +
1045                             odsp->nfsclds_servownlen + 1, M_NFSCLDS,
1046                             M_WAITOK | M_ZERO);
1047                         dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
1048                         dsp->nfsclds_servownlen = odsp->nfsclds_servownlen;
1049                         dsp->nfsclds_sess.nfsess_clientid =
1050                             odsp->nfsclds_sess.nfsess_clientid;
1051                         dsp->nfsclds_sess.nfsess_sequenceid =
1052                             odsp->nfsclds_sess.nfsess_sequenceid + 1;
1053                         dsp->nfsclds_flags = odsp->nfsclds_flags;
1054                         if (dsp->nfsclds_servownlen > 0)
1055                                 memcpy(dsp->nfsclds_serverown,
1056                                     odsp->nfsclds_serverown,
1057                                     dsp->nfsclds_servownlen + 1);
1058                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1059                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
1060                             NULL, MTX_DEF);
1061                         nfscl_initsessionslots(&dsp->nfsclds_sess);
1062                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
1063                             &nmp->nm_sockreq, NULL,
1064                             dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
1065                         NFSCL_DEBUG(1, "create session for extant "
1066                             "ClientID=%d\n", error);
1067                         if (error != 0) {
1068                                 nfscl_freenfsclds(dsp);
1069                                 dsp = NULL;
1070                                 /*
1071                                  * If *retokp is true, return any error other
1072                                  * than NFSERR_STALECLIENTID,
1073                                  * NFSERR_BADSESSION or NFSERR_STALEDONTRECOVER
1074                                  * so that nfscl_recover() will not loop.
1075                                  */
1076                                 if (*retokp)
1077                                         return (NFSERR_IO);
1078                         } else
1079                                 *retokp = true;
1080                 } else if (retokp != NULL && *retokp)
1081                         return (NFSERR_IO);
1082                 if (error != 0) {
1083                         /*
1084                          * Either there was no previous session or the
1085                          * CreateSession attempt failed, so...
1086                          * do an ExchangeID followed by the CreateSession.
1087                          */
1088                         clp->nfsc_rev = rev++;
1089                         error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0,
1090                             NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp,
1091                             cred, p);
1092                         NFSCL_DEBUG(1, "aft exch=%d\n", error);
1093                         if (error == 0)
1094                                 error = nfsrpc_createsession(nmp,
1095                                     &dsp->nfsclds_sess, &nmp->nm_sockreq, NULL,
1096                                     dsp->nfsclds_sess.nfsess_sequenceid, 1,
1097                                     cred, p);
1098                         NFSCL_DEBUG(1, "aft createsess=%d\n", error);
1099                 }
1100                 if (error == 0) {
1101                         /*
1102                          * If the session supports a backchannel, set up
1103                          * the BindConnectionToSession call in the krpc
1104                          * so that it is done on a reconnection.
1105                          */
1106                         if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0) {
1107                                 rcp = mem_alloc(sizeof(*rcp));
1108                                 rcp->minorvers = nmp->nm_minorvers;
1109                                 memcpy(rcp->sessionid,
1110                                     dsp->nfsclds_sess.nfsess_sessionid,
1111                                     NFSX_V4SESSIONID);
1112                                 recon.call = nfsrpc_bindconnsess;
1113                                 recon.arg = rcp;
1114                                 CLNT_CONTROL(nmp->nm_client, CLSET_RECONUPCALL,
1115                                     &recon);
1116                         }
1117
1118                         NFSLOCKMNT(nmp);
1119                         /*
1120                          * The old sessions cannot be safely free'd
1121                          * here, since they may still be used by
1122                          * in-progress RPCs.
1123                          */
1124                         tsep = NULL;
1125                         if (TAILQ_FIRST(&nmp->nm_sess) != NULL) {
1126                                 /*
1127                                  * Mark the old session defunct.  Needed
1128                                  * when called from nfscl_hasexpired().
1129                                  */
1130                                 tsep = NFSMNT_MDSSESSION(nmp);
1131                                 tsep->nfsess_defunct = 1;
1132                         }
1133                         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
1134                             nfsclds_list);
1135                         /*
1136                          * Wake up RPCs waiting for a slot on the
1137                          * old session. These will then fail with
1138                          * NFSERR_BADSESSION and be retried with the
1139                          * new session by nfsv4_setsequence().
1140                          * Also wakeup() processes waiting for the
1141                          * new session.
1142                          */
1143                         if (tsep != NULL)
1144                                 wakeup(&tsep->nfsess_slots);
1145                         wakeup(&nmp->nm_sess);
1146                         NFSUNLOCKMNT(nmp);
1147                 } else if (dsp != NULL)
1148                         nfscl_freenfsclds(dsp);
1149                 if (error == 0 && reclaim == 0) {
1150                         error = nfsrpc_reclaimcomplete(nmp, cred, p);
1151                         NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
1152                         if (error == NFSERR_COMPLETEALREADY ||
1153                             error == NFSERR_NOTSUPP)
1154                                 /* Ignore this error. */
1155                                 error = 0;
1156                 }
1157                 return (error);
1158         } else if (retokp != NULL && *retokp)
1159                 return (NFSERR_IO);
1160         clp->nfsc_rev = rev++;
1161
1162         /*
1163          * Allocate a single session structure for NFSv4.0, because some of
1164          * the fields are used by NFSv4.0 although it doesn't do a session.
1165          */
1166         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
1167         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1168         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
1169         NFSLOCKMNT(nmp);
1170         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
1171         tsep = NFSMNT_MDSSESSION(nmp);
1172         NFSUNLOCKMNT(nmp);
1173
1174         nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0,
1175             NULL);
1176         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1177         *tl++ = txdr_unsigned(nfsboottime.tv_sec);
1178         *tl = txdr_unsigned(clp->nfsc_rev);
1179         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
1180
1181         /*
1182          * set up the callback address
1183          */
1184         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1185         *tl = txdr_unsigned(NFS_CALLBCKPROG);
1186         callblen = strlen(nfsv4_callbackaddr);
1187         if (callblen == 0)
1188                 cp = nfscl_getmyip(nmp, &a6, &isinet6);
1189         if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1190             (callblen > 0 || cp != NULL)) {
1191                 port = htons(nfsv4_cbport);
1192                 cp2 = (u_int8_t *)&port;
1193 #ifdef INET6
1194                 if ((callblen > 0 &&
1195                      strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1196                         char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1197
1198                         (void) nfsm_strtom(nd, "tcp6", 4);
1199                         if (callblen == 0) {
1200                                 ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1201                                 ip6add = ip6buf;
1202                         } else {
1203                                 ip6add = nfsv4_callbackaddr;
1204                         }
1205                         snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1206                             ip6add, cp2[0], cp2[1]);
1207                 } else
1208 #endif
1209                 {
1210                         (void) nfsm_strtom(nd, "tcp", 3);
1211                         if (callblen == 0)
1212                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1213                                     "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1214                                     cp[2], cp[3], cp2[0], cp2[1]);
1215                         else
1216                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1217                                     "%s.%d.%d", nfsv4_callbackaddr,
1218                                     cp2[0], cp2[1]);
1219                 }
1220                 (void) nfsm_strtom(nd, addr, strlen(addr));
1221         } else {
1222                 (void) nfsm_strtom(nd, "tcp", 3);
1223                 (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1224         }
1225         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1226         *tl = txdr_unsigned(clp->nfsc_cbident);
1227         nd->nd_flag |= ND_USEGSSNAME;
1228         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1229                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1230         if (error)
1231                 return (error);
1232         if (nd->nd_repstat == 0) {
1233             NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1234             tsep->nfsess_clientid.lval[0] = *tl++;
1235             tsep->nfsess_clientid.lval[1] = *tl++;
1236             confirm.lval[0] = *tl++;
1237             confirm.lval[1] = *tl;
1238             m_freem(nd->nd_mrep);
1239             nd->nd_mrep = NULL;
1240
1241             /*
1242              * and confirm it.
1243              */
1244             nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1245                 NULL, 0, 0, NULL);
1246             NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1247             *tl++ = tsep->nfsess_clientid.lval[0];
1248             *tl++ = tsep->nfsess_clientid.lval[1];
1249             *tl++ = confirm.lval[0];
1250             *tl = confirm.lval[1];
1251             nd->nd_flag |= ND_USEGSSNAME;
1252             error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1253                 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1254             if (error)
1255                 return (error);
1256             m_freem(nd->nd_mrep);
1257             nd->nd_mrep = NULL;
1258         }
1259         error = nd->nd_repstat;
1260 nfsmout:
1261         m_freem(nd->nd_mrep);
1262         return (error);
1263 }
1264
1265 /*
1266  * nfs getattr call.
1267  */
1268 int
1269 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1270     struct nfsvattr *nap)
1271 {
1272         struct nfsrv_descript nfsd, *nd = &nfsd;
1273         int error;
1274         nfsattrbit_t attrbits;
1275         struct nfsnode *np;
1276         struct nfsmount *nmp;
1277
1278         nmp = VFSTONFS(vp->v_mount);
1279         np = VTONFS(vp);
1280         if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
1281             nmp->nm_fhsize == 0) {
1282                 /* Attempt to get the actual root file handle. */
1283                 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, p);
1284                 if (error != 0)
1285                         return (EACCES);
1286                 if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
1287                         nfscl_statfs(vp, cred, p);
1288         }
1289         NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
1290         if (nd->nd_flag & ND_NFSV4) {
1291                 NFSGETATTR_ATTRBIT(&attrbits);
1292                 (void) nfsrv_putattrbit(nd, &attrbits);
1293         }
1294         error = nfscl_request(nd, vp, p, cred);
1295         if (error)
1296                 return (error);
1297         if (!nd->nd_repstat)
1298                 error = nfsm_loadattr(nd, nap);
1299         else
1300                 error = nd->nd_repstat;
1301         m_freem(nd->nd_mrep);
1302         return (error);
1303 }
1304
1305 /*
1306  * nfs getattr call with non-vnode arguments.
1307  */
1308 int
1309 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1310     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1311     uint32_t *leasep)
1312 {
1313         struct nfsrv_descript nfsd, *nd = &nfsd;
1314         int error, vers = NFS_VER2;
1315         nfsattrbit_t attrbits;
1316
1317         nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0,
1318             cred);
1319         if (nd->nd_flag & ND_NFSV4) {
1320                 vers = NFS_VER4;
1321                 NFSGETATTR_ATTRBIT(&attrbits);
1322                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1323                 (void) nfsrv_putattrbit(nd, &attrbits);
1324         } else if (nd->nd_flag & ND_NFSV3) {
1325                 vers = NFS_VER3;
1326         }
1327         if (syscred)
1328                 nd->nd_flag |= ND_USEGSSNAME;
1329         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1330             NFS_PROG, vers, NULL, 1, xidp, NULL);
1331         if (error)
1332                 return (error);
1333         if (nd->nd_repstat == 0) {
1334                 if ((nd->nd_flag & ND_NFSV4) != 0)
1335                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1336                             NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1337                             NULL, NULL);
1338                 else
1339                         error = nfsm_loadattr(nd, nap);
1340         } else
1341                 error = nd->nd_repstat;
1342         m_freem(nd->nd_mrep);
1343         return (error);
1344 }
1345
1346 /*
1347  * Do an nfs setattr operation.
1348  */
1349 int
1350 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1351     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp)
1352 {
1353         int error, expireret = 0, openerr, retrycnt;
1354         u_int32_t clidrev = 0, mode;
1355         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1356         struct nfsfh *nfhp;
1357         nfsv4stateid_t stateid;
1358         void *lckp;
1359
1360         if (nmp->nm_clp != NULL)
1361                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1362         if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1363                 mode = NFSV4OPEN_ACCESSWRITE;
1364         else
1365                 mode = NFSV4OPEN_ACCESSREAD;
1366         retrycnt = 0;
1367         do {
1368                 lckp = NULL;
1369                 openerr = 1;
1370                 if (NFSHASNFSV4(nmp)) {
1371                         nfhp = VTONFS(vp)->n_fhp;
1372                         error = nfscl_getstateid(vp, nfhp->nfh_fh,
1373                             nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1374                         if (error && vp->v_type == VREG &&
1375                             (mode == NFSV4OPEN_ACCESSWRITE ||
1376                              nfstest_openallsetattr)) {
1377                                 /*
1378                                  * No Open stateid, so try and open the file
1379                                  * now.
1380                                  */
1381                                 if (mode == NFSV4OPEN_ACCESSWRITE)
1382                                         openerr = nfsrpc_open(vp, FWRITE, cred,
1383                                             p);
1384                                 else
1385                                         openerr = nfsrpc_open(vp, FREAD, cred,
1386                                             p);
1387                                 if (!openerr)
1388                                         (void) nfscl_getstateid(vp,
1389                                             nfhp->nfh_fh, nfhp->nfh_len,
1390                                             mode, 0, cred, p, &stateid, &lckp);
1391                         }
1392                 }
1393                 if (vap != NULL)
1394                         error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1395                             rnap, attrflagp);
1396                 else
1397                         error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid);
1398                 if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1399                         NFSLOCKMNT(nmp);
1400                         nmp->nm_state |= NFSSTA_OPENMODE;
1401                         NFSUNLOCKMNT(nmp);
1402                 }
1403                 if (error == NFSERR_STALESTATEID)
1404                         nfscl_initiate_recovery(nmp->nm_clp);
1405                 if (lckp != NULL)
1406                         nfscl_lockderef(lckp);
1407                 if (!openerr)
1408                         (void) nfsrpc_close(vp, 0, p);
1409                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1410                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1411                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1412                         (void) nfs_catnap(PZERO, error, "nfs_setattr");
1413                 } else if ((error == NFSERR_EXPIRED ||
1414                     ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1415                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
1416                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1417                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1418                     NFSHASNFSV4N(nmp)) {
1419                         error = EIO;
1420                 }
1421                 retrycnt++;
1422         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1423             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1424             error == NFSERR_BADSESSION ||
1425             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1426             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1427              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1428             (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1429              retrycnt < 4));
1430         if (error && retrycnt >= 4)
1431                 error = EIO;
1432         return (error);
1433 }
1434
1435 static int
1436 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1437     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1438     struct nfsvattr *rnap, int *attrflagp)
1439 {
1440         u_int32_t *tl;
1441         struct nfsrv_descript nfsd, *nd = &nfsd;
1442         int error;
1443         nfsattrbit_t attrbits;
1444
1445         *attrflagp = 0;
1446         NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp, cred);
1447         if (nd->nd_flag & ND_NFSV4)
1448                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1449         vap->va_type = vp->v_type;
1450         nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1451         if (nd->nd_flag & ND_NFSV3) {
1452                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1453                 *tl = newnfs_false;
1454         } else if (nd->nd_flag & ND_NFSV4) {
1455                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1456                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1457                 NFSGETATTR_ATTRBIT(&attrbits);
1458                 (void) nfsrv_putattrbit(nd, &attrbits);
1459         }
1460         error = nfscl_request(nd, vp, p, cred);
1461         if (error)
1462                 return (error);
1463         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1464                 error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, NULL);
1465         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1466                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1467         if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1468                 error = nfscl_postop_attr(nd, rnap, attrflagp);
1469         m_freem(nd->nd_mrep);
1470         if (nd->nd_repstat && !error)
1471                 error = nd->nd_repstat;
1472         return (error);
1473 }
1474
1475 /*
1476  * nfs lookup rpc
1477  */
1478 int
1479 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1480     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1481     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, uint32_t openmode)
1482 {
1483         uint32_t deleg, rflags, *tl;
1484         struct nfsrv_descript nfsd, *nd = &nfsd;
1485         struct nfsmount *nmp;
1486         struct nfsnode *np;
1487         struct nfsfh *nfhp;
1488         nfsattrbit_t attrbits;
1489         int error = 0, lookupp = 0, newone, ret, retop;
1490         uint8_t own[NFSV4CL_LOCKNAMELEN];
1491         struct nfsclopen *op;
1492         struct nfscldeleg *ndp;
1493         nfsv4stateid_t stateid;
1494
1495         *attrflagp = 0;
1496         *dattrflagp = 0;
1497         if (dvp->v_type != VDIR)
1498                 return (ENOTDIR);
1499         nmp = VFSTONFS(dvp->v_mount);
1500         if (len > NFS_MAXNAMLEN)
1501                 return (ENAMETOOLONG);
1502         if (NFSHASNFSV4(nmp) && len == 1 &&
1503                 name[0] == '.') {
1504                 /*
1505                  * Just return the current dir's fh.
1506                  */
1507                 np = VTONFS(dvp);
1508                 nfhp = malloc(sizeof (struct nfsfh) +
1509                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1510                 nfhp->nfh_len = np->n_fhp->nfh_len;
1511                 NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1512                 *nfhpp = nfhp;
1513                 return (0);
1514         }
1515         if (NFSHASNFSV4(nmp) && len == 2 &&
1516                 name[0] == '.' && name[1] == '.') {
1517                 lookupp = 1;
1518                 openmode = 0;
1519                 NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp, cred);
1520         } else if (openmode != 0) {
1521                 NFSCL_REQSTART(nd, NFSPROC_LOOKUPOPEN, dvp, cred);
1522                 nfsm_strtom(nd, name, len);
1523         } else {
1524                 NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp, cred);
1525                 (void) nfsm_strtom(nd, name, len);
1526         }
1527         if (nd->nd_flag & ND_NFSV4) {
1528                 NFSGETATTR_ATTRBIT(&attrbits);
1529                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1530                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
1531                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1532                 (void) nfsrv_putattrbit(nd, &attrbits);
1533                 if (openmode != 0) {
1534                         /* Test for a VREG file. */
1535                         NFSZERO_ATTRBIT(&attrbits);
1536                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
1537                         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
1538                         *tl = txdr_unsigned(NFSV4OP_VERIFY);
1539                         nfsrv_putattrbit(nd, &attrbits);
1540                         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1541                         *tl++ = txdr_unsigned(NFSX_UNSIGNED);
1542                         *tl = vtonfsv34_type(VREG);
1543
1544                         /* Attempt the Open for VREG. */
1545                         nfscl_filllockowner(NULL, own, F_POSIX);
1546                         NFSM_BUILD(tl, uint32_t *, 6 * NFSX_UNSIGNED);
1547                         *tl++ = txdr_unsigned(NFSV4OP_OPEN);
1548                         *tl++ = 0;              /* seqid, ignored. */
1549                         *tl++ = txdr_unsigned(openmode);
1550                         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
1551                         *tl++ = 0;              /* ClientID, ignored. */
1552                         *tl = 0;
1553                         nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN);
1554                         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1555                         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
1556                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
1557                 }
1558         }
1559         error = nfscl_request(nd, dvp, p, cred);
1560         if (error)
1561                 return (error);
1562         ndp = NULL;
1563         if (nd->nd_repstat) {
1564                 /*
1565                  * When an NFSv4 Lookupp returns ENOENT, it means that
1566                  * the lookup is at the root of an fs, so return this dir.
1567                  */
1568                 if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1569                     np = VTONFS(dvp);
1570                     nfhp = malloc(sizeof (struct nfsfh) +
1571                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1572                     nfhp->nfh_len = np->n_fhp->nfh_len;
1573                     NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1574                     *nfhpp = nfhp;
1575                     m_freem(nd->nd_mrep);
1576                     return (0);
1577                 }
1578                 if (nd->nd_flag & ND_NFSV3)
1579                     error = nfscl_postop_attr(nd, dnap, dattrflagp);
1580                 else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1581                     ND_NFSV4) {
1582                         /* Load the directory attributes. */
1583                         error = nfsm_loadattr(nd, dnap);
1584                         if (error != 0)
1585                                 goto nfsmout;
1586                         *dattrflagp = 1;
1587                 }
1588                 /* Check Lookup operation reply status. */
1589                 if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1590                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1591                         if (*++tl != 0)
1592                                 goto nfsmout;
1593                 }
1594                 /* Look for GetFH reply. */
1595                 if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1596                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1597                         if (*++tl != 0)
1598                                 goto nfsmout;
1599                         error = nfsm_getfh(nd, nfhpp);
1600                         if (error)
1601                                 goto nfsmout;
1602                 }
1603                 /* Look for Getattr reply. */
1604                 if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1605                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1606                         if (*++tl != 0)
1607                                 goto nfsmout;
1608                         error = nfsm_loadattr(nd, nap);
1609                         if (error == 0) {
1610                                 /*
1611                                  * We have now successfully completed the
1612                                  * lookup, so set nd_repstat to 0.
1613                                  */
1614                                 nd->nd_repstat = 0;
1615                                 *attrflagp = 1;
1616                         }
1617                 }
1618                 goto nfsmout;
1619         }
1620         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1621                 /* Load the directory attributes. */
1622                 error = nfsm_loadattr(nd, dnap);
1623                 if (error != 0)
1624                         goto nfsmout;
1625                 *dattrflagp = 1;
1626                 /* Skip over the Lookup and GetFH operation status values. */
1627                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1628         }
1629         error = nfsm_getfh(nd, nfhpp);
1630         if (error)
1631                 goto nfsmout;
1632
1633         error = nfscl_postop_attr(nd, nap, attrflagp);
1634         if (openmode != 0 && error == 0) {
1635                 NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID +
1636                     10 * NFSX_UNSIGNED);
1637                 tl += 4;        /* Skip over Verify+Open status. */
1638                 stateid.seqid = *tl++;
1639                 stateid.other[0] = *tl++;
1640                 stateid.other[1] = *tl++;
1641                 stateid.other[2] = *tl;
1642                 rflags = fxdr_unsigned(uint32_t, *(tl + 6));
1643                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1644                 if (error != 0)
1645                         goto nfsmout;
1646                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
1647                 deleg = fxdr_unsigned(uint32_t, *tl);
1648                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
1649                     deleg == NFSV4OPEN_DELEGATEWRITE) {
1650                         /*
1651                          * Just need to fill in the fields used by
1652                          * nfscl_trydelegreturn().
1653                          * Mark the mount point as acquiring
1654                          * delegations, so NFSPROC_LOOKUPOPEN will
1655                          * no longer be done.
1656                          */
1657                         NFSLOCKMNT(nmp);
1658                         nmp->nm_privflag |= NFSMNTP_DELEGISSUED;
1659                         NFSUNLOCKMNT(nmp);
1660                         ndp = malloc(sizeof(struct nfscldeleg) +
1661                             (*nfhpp)->nfh_len, M_NFSCLDELEG, M_WAITOK);
1662                         ndp->nfsdl_fhlen = (*nfhpp)->nfh_len;
1663                         NFSBCOPY((*nfhpp)->nfh_fh, ndp->nfsdl_fh,
1664                             ndp->nfsdl_fhlen);
1665                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
1666                         NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
1667                         ndp->nfsdl_stateid.seqid = *tl++;
1668                         ndp->nfsdl_stateid.other[0] = *tl++;
1669                         ndp->nfsdl_stateid.other[1] = *tl++;
1670                         ndp->nfsdl_stateid.other[2] = *tl++;
1671                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
1672                         error = NFSERR_BADXDR;
1673                         goto nfsmout;
1674                 }
1675                 ret = nfscl_open(dvp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len,
1676                     openmode, 0, cred, p, NULL, &op, &newone, &retop, 1, true);
1677                 if (ret != 0)
1678                         goto nfsmout;
1679                 if (newone != 0) {
1680                         op->nfso_stateid.seqid = stateid.seqid;
1681                         op->nfso_stateid.other[0] = stateid.other[0];
1682                         op->nfso_stateid.other[1] = stateid.other[1];
1683                         op->nfso_stateid.other[2] = stateid.other[2];
1684                         op->nfso_mode = openmode;
1685                 } else {
1686                         op->nfso_stateid.seqid = stateid.seqid;
1687                         if (retop == NFSCLOPEN_DOOPEN)
1688                                 op->nfso_mode |= openmode;
1689                 }
1690                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
1691                     nfscl_assumeposixlocks)
1692                         op->nfso_posixlock = 1;
1693                 else
1694                         op->nfso_posixlock = 0;
1695                 nfscl_openrelease(nmp, op, 0, 0);
1696                 if (ndp != NULL) {
1697                         /*
1698                          * Since we do not have the vnode, we
1699                          * cannot invalidate cached attributes.
1700                          * Just return the delegation.
1701                          */
1702                         nfscl_trydelegreturn(ndp, cred, nmp, p);
1703                 }
1704         }
1705         if ((nd->nd_flag & ND_NFSV3) && !error)
1706                 error = nfscl_postop_attr(nd, dnap, dattrflagp);
1707 nfsmout:
1708         m_freem(nd->nd_mrep);
1709         if (!error && nd->nd_repstat)
1710                 error = nd->nd_repstat;
1711         free(ndp, M_NFSCLDELEG);
1712         return (error);
1713 }
1714
1715 /*
1716  * Do a readlink rpc.
1717  */
1718 int
1719 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1720     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
1721 {
1722         u_int32_t *tl;
1723         struct nfsrv_descript nfsd, *nd = &nfsd;
1724         struct nfsnode *np = VTONFS(vp);
1725         nfsattrbit_t attrbits;
1726         int error, len, cangetattr = 1;
1727
1728         *attrflagp = 0;
1729         NFSCL_REQSTART(nd, NFSPROC_READLINK, vp, cred);
1730         if (nd->nd_flag & ND_NFSV4) {
1731                 /*
1732                  * And do a Getattr op.
1733                  */
1734                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1735                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1736                 NFSGETATTR_ATTRBIT(&attrbits);
1737                 (void) nfsrv_putattrbit(nd, &attrbits);
1738         }
1739         error = nfscl_request(nd, vp, p, cred);
1740         if (error)
1741                 return (error);
1742         if (nd->nd_flag & ND_NFSV3)
1743                 error = nfscl_postop_attr(nd, nap, attrflagp);
1744         if (!nd->nd_repstat && !error) {
1745                 NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1746                 /*
1747                  * This seems weird to me, but must have been added to
1748                  * FreeBSD for some reason. The only thing I can think of
1749                  * is that there was/is some server that replies with
1750                  * more link data than it should?
1751                  */
1752                 if (len == NFS_MAXPATHLEN) {
1753                         NFSLOCKNODE(np);
1754                         if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1755                                 len = np->n_size;
1756                                 cangetattr = 0;
1757                         }
1758                         NFSUNLOCKNODE(np);
1759                 }
1760                 error = nfsm_mbufuio(nd, uiop, len);
1761                 if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1762                         error = nfscl_postop_attr(nd, nap, attrflagp);
1763         }
1764         if (nd->nd_repstat && !error)
1765                 error = nd->nd_repstat;
1766 nfsmout:
1767         m_freem(nd->nd_mrep);
1768         return (error);
1769 }
1770
1771 /*
1772  * Read operation.
1773  */
1774 int
1775 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1776     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
1777 {
1778         int error, expireret = 0, retrycnt;
1779         u_int32_t clidrev = 0;
1780         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1781         struct nfsnode *np = VTONFS(vp);
1782         struct ucred *newcred;
1783         struct nfsfh *nfhp = NULL;
1784         nfsv4stateid_t stateid;
1785         void *lckp;
1786
1787         if (nmp->nm_clp != NULL)
1788                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1789         newcred = cred;
1790         if (NFSHASNFSV4(nmp)) {
1791                 nfhp = np->n_fhp;
1792                 newcred = NFSNEWCRED(cred);
1793         }
1794         retrycnt = 0;
1795         do {
1796                 lckp = NULL;
1797                 if (NFSHASNFSV4(nmp))
1798                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1799                             NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1800                             &lckp);
1801                 error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1802                     attrflagp);
1803                 if (error == NFSERR_OPENMODE) {
1804                         NFSLOCKMNT(nmp);
1805                         nmp->nm_state |= NFSSTA_OPENMODE;
1806                         NFSUNLOCKMNT(nmp);
1807                 }
1808                 if (error == NFSERR_STALESTATEID)
1809                         nfscl_initiate_recovery(nmp->nm_clp);
1810                 if (lckp != NULL)
1811                         nfscl_lockderef(lckp);
1812                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1813                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1814                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1815                         (void) nfs_catnap(PZERO, error, "nfs_read");
1816                 } else if ((error == NFSERR_EXPIRED ||
1817                     ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1818                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
1819                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1820                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1821                     NFSHASNFSV4N(nmp)) {
1822                         error = EIO;
1823                 }
1824                 retrycnt++;
1825         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1826             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1827             error == NFSERR_BADSESSION ||
1828             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1829             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1830              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1831             (error == NFSERR_OPENMODE && retrycnt < 4));
1832         if (error && retrycnt >= 4)
1833                 error = EIO;
1834         if (NFSHASNFSV4(nmp))
1835                 NFSFREECRED(newcred);
1836         return (error);
1837 }
1838
1839 /*
1840  * The actual read RPC.
1841  */
1842 static int
1843 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1844     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1845     int *attrflagp)
1846 {
1847         u_int32_t *tl;
1848         int error = 0, len, retlen, tsiz, eof = 0;
1849         struct nfsrv_descript nfsd;
1850         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1851         struct nfsrv_descript *nd = &nfsd;
1852         int rsize;
1853         off_t tmp_off;
1854
1855         *attrflagp = 0;
1856         tsiz = uiop->uio_resid;
1857         tmp_off = uiop->uio_offset + tsiz;
1858         NFSLOCKMNT(nmp);
1859         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1860                 NFSUNLOCKMNT(nmp);
1861                 return (EFBIG);
1862         }
1863         rsize = nmp->nm_rsize;
1864         NFSUNLOCKMNT(nmp);
1865         nd->nd_mrep = NULL;
1866         while (tsiz > 0) {
1867                 *attrflagp = 0;
1868                 len = (tsiz > rsize) ? rsize : tsiz;
1869                 NFSCL_REQSTART(nd, NFSPROC_READ, vp, cred);
1870                 if (nd->nd_flag & ND_NFSV4)
1871                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1872                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1873                 if (nd->nd_flag & ND_NFSV2) {
1874                         *tl++ = txdr_unsigned(uiop->uio_offset);
1875                         *tl++ = txdr_unsigned(len);
1876                         *tl = 0;
1877                 } else {
1878                         txdr_hyper(uiop->uio_offset, tl);
1879                         *(tl + 2) = txdr_unsigned(len);
1880                 }
1881                 /*
1882                  * Since I can't do a Getattr for NFSv4 for Write, there
1883                  * doesn't seem any point in doing one here, either.
1884                  * (See the comment in nfsrpc_writerpc() for more info.)
1885                  */
1886                 error = nfscl_request(nd, vp, p, cred);
1887                 if (error)
1888                         return (error);
1889                 if (nd->nd_flag & ND_NFSV3) {
1890                         error = nfscl_postop_attr(nd, nap, attrflagp);
1891                 } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1892                         error = nfsm_loadattr(nd, nap);
1893                         if (!error)
1894                                 *attrflagp = 1;
1895                 }
1896                 if (nd->nd_repstat || error) {
1897                         if (!error)
1898                                 error = nd->nd_repstat;
1899                         goto nfsmout;
1900                 }
1901                 if (nd->nd_flag & ND_NFSV3) {
1902                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1903                         eof = fxdr_unsigned(int, *(tl + 1));
1904                 } else if (nd->nd_flag & ND_NFSV4) {
1905                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1906                         eof = fxdr_unsigned(int, *tl);
1907                 }
1908                 NFSM_STRSIZ(retlen, len);
1909                 error = nfsm_mbufuio(nd, uiop, retlen);
1910                 if (error)
1911                         goto nfsmout;
1912                 m_freem(nd->nd_mrep);
1913                 nd->nd_mrep = NULL;
1914                 tsiz -= retlen;
1915                 if (!(nd->nd_flag & ND_NFSV2)) {
1916                         if (eof || retlen == 0)
1917                                 tsiz = 0;
1918                 } else if (retlen < len)
1919                         tsiz = 0;
1920         }
1921         return (0);
1922 nfsmout:
1923         if (nd->nd_mrep != NULL)
1924                 m_freem(nd->nd_mrep);
1925         return (error);
1926 }
1927
1928 /*
1929  * nfs write operation
1930  * When called_from_strategy != 0, it should return EIO for an error that
1931  * indicates recovery is in progress, so that the buffer will be left
1932  * dirty and be written back to the server later. If it loops around,
1933  * the recovery thread could get stuck waiting for the buffer and recovery
1934  * will then deadlock.
1935  */
1936 int
1937 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1938     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1939     int called_from_strategy, int ioflag)
1940 {
1941         int error, expireret = 0, retrycnt, nostateid;
1942         u_int32_t clidrev = 0;
1943         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1944         struct nfsnode *np = VTONFS(vp);
1945         struct ucred *newcred;
1946         struct nfsfh *nfhp = NULL;
1947         nfsv4stateid_t stateid;
1948         void *lckp;
1949
1950         KASSERT(*must_commit >= 0 && *must_commit <= 2,
1951             ("nfsrpc_write: must_commit out of range=%d", *must_commit));
1952         if (nmp->nm_clp != NULL)
1953                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1954         newcred = cred;
1955         if (NFSHASNFSV4(nmp)) {
1956                 newcred = NFSNEWCRED(cred);
1957                 nfhp = np->n_fhp;
1958         }
1959         retrycnt = 0;
1960         do {
1961                 lckp = NULL;
1962                 nostateid = 0;
1963                 if (NFSHASNFSV4(nmp)) {
1964                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1965                             NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1966                             &lckp);
1967                         if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1968                             stateid.other[2] == 0) {
1969                                 nostateid = 1;
1970                                 NFSCL_DEBUG(1, "stateid0 in write\n");
1971                         }
1972                 }
1973
1974                 /*
1975                  * If there is no stateid for NFSv4, it means this is an
1976                  * extraneous write after close. Basically a poorly
1977                  * implemented buffer cache. Just don't do the write.
1978                  */
1979                 if (nostateid)
1980                         error = 0;
1981                 else
1982                         error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1983                             newcred, &stateid, p, nap, attrflagp, ioflag);
1984                 if (error == NFSERR_STALESTATEID)
1985                         nfscl_initiate_recovery(nmp->nm_clp);
1986                 if (lckp != NULL)
1987                         nfscl_lockderef(lckp);
1988                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1989                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1990                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1991                         (void) nfs_catnap(PZERO, error, "nfs_write");
1992                 } else if ((error == NFSERR_EXPIRED ||
1993                     ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1994                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
1995                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1996                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1997                     NFSHASNFSV4N(nmp)) {
1998                         error = EIO;
1999                 }
2000                 retrycnt++;
2001         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
2002             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
2003               error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
2004             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
2005             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2006              expireret == 0 && clidrev != 0 && retrycnt < 4));
2007         if (error != 0 && (retrycnt >= 4 ||
2008             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
2009               error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
2010                 error = EIO;
2011         if (NFSHASNFSV4(nmp))
2012                 NFSFREECRED(newcred);
2013         return (error);
2014 }
2015
2016 /*
2017  * The actual write RPC.
2018  */
2019 static int
2020 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
2021     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
2022     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int ioflag)
2023 {
2024         u_int32_t *tl;
2025         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2026         struct nfsnode *np = VTONFS(vp);
2027         int error = 0, len, rlen, commit, committed = NFSWRITE_FILESYNC;
2028         int wccflag = 0;
2029         int32_t backup;
2030         struct nfsrv_descript *nd;
2031         nfsattrbit_t attrbits;
2032         uint64_t tmp_off;
2033         ssize_t tsiz, wsize;
2034         bool do_append;
2035
2036         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
2037         *attrflagp = 0;
2038         tsiz = uiop->uio_resid;
2039         tmp_off = uiop->uio_offset + tsiz;
2040         NFSLOCKMNT(nmp);
2041         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
2042                 NFSUNLOCKMNT(nmp);
2043                 return (EFBIG);
2044         }
2045         wsize = nmp->nm_wsize;
2046         do_append = false;
2047         if ((ioflag & IO_APPEND) != 0 && NFSHASNFSV4(nmp) && !NFSHASPNFS(nmp))
2048                 do_append = true;
2049         NFSUNLOCKMNT(nmp);
2050         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK);
2051         nd->nd_mrep = NULL;     /* NFSv2 sometimes does a write with */
2052         nd->nd_repstat = 0;     /* uio_resid == 0, so the while is not done */
2053         while (tsiz > 0) {
2054                 *attrflagp = 0;
2055                 len = (tsiz > wsize) ? wsize : tsiz;
2056                 if (do_append)
2057                         NFSCL_REQSTART(nd, NFSPROC_APPENDWRITE, vp, cred);
2058                 else
2059                         NFSCL_REQSTART(nd, NFSPROC_WRITE, vp, cred);
2060                 if (nd->nd_flag & ND_NFSV4) {
2061                         if (do_append) {
2062                                 NFSZERO_ATTRBIT(&attrbits);
2063                                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
2064                                 nfsrv_putattrbit(nd, &attrbits);
2065                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED +
2066                                     NFSX_HYPER);
2067                                 *tl++ = txdr_unsigned(NFSX_HYPER);
2068                                 txdr_hyper(uiop->uio_offset, tl); tl += 2;
2069                                 *tl = txdr_unsigned(NFSV4OP_WRITE);
2070                         }
2071                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
2072                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
2073                         txdr_hyper(uiop->uio_offset, tl);
2074                         tl += 2;
2075                         *tl++ = txdr_unsigned(*iomode);
2076                         *tl = txdr_unsigned(len);
2077                 } else if (nd->nd_flag & ND_NFSV3) {
2078                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
2079                         txdr_hyper(uiop->uio_offset, tl);
2080                         tl += 2;
2081                         *tl++ = txdr_unsigned(len);
2082                         *tl++ = txdr_unsigned(*iomode);
2083                         *tl = txdr_unsigned(len);
2084                 } else {
2085                         u_int32_t x;
2086
2087                         NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2088                         /*
2089                          * Not sure why someone changed this, since the
2090                          * RFC clearly states that "beginoffset" and
2091                          * "totalcount" are ignored, but it wouldn't
2092                          * surprise me if there's a busted server out there.
2093                          */
2094                         /* Set both "begin" and "current" to non-garbage. */
2095                         x = txdr_unsigned((u_int32_t)uiop->uio_offset);
2096                         *tl++ = x;      /* "begin offset" */
2097                         *tl++ = x;      /* "current offset" */
2098                         x = txdr_unsigned(len);
2099                         *tl++ = x;      /* total to this offset */
2100                         *tl = x;        /* size of this write */
2101                 }
2102                 error = nfsm_uiombuf(nd, uiop, len);
2103                 if (error != 0) {
2104                         m_freem(nd->nd_mreq);
2105                         free(nd, M_TEMP);
2106                         return (error);
2107                 }
2108                 /*
2109                  * Although it is tempting to do a normal Getattr Op in the
2110                  * NFSv4 compound, the result can be a nearly hung client
2111                  * system if the Getattr asks for Owner and/or OwnerGroup.
2112                  * It occurs when the client can't map either the Owner or
2113                  * Owner_group name in the Getattr reply to a uid/gid. When
2114                  * there is a cache miss, the kernel does an upcall to the
2115                  * nfsuserd. Then, it can try and read the local /etc/passwd
2116                  * or /etc/group file. It can then block in getnewbuf(),
2117                  * waiting for dirty writes to be pushed to the NFS server.
2118                  * The only reason this doesn't result in a complete
2119                  * deadlock, is that the upcall times out and allows
2120                  * the write to complete. However, progress is so slow
2121                  * that it might just as well be deadlocked.
2122                  * As such, we get the rest of the attributes, but not
2123                  * Owner or Owner_group.
2124                  * nb: nfscl_loadattrcache() needs to be told that these
2125                  *     partial attributes from a write rpc are being
2126                  *     passed in, via a argument flag.
2127                  */
2128                 if (nd->nd_flag & ND_NFSV4) {
2129                         NFSWRITEGETATTR_ATTRBIT(&attrbits);
2130                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2131                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2132                         (void) nfsrv_putattrbit(nd, &attrbits);
2133                 }
2134                 error = nfscl_request(nd, vp, p, cred);
2135                 if (error) {
2136                         free(nd, M_TEMP);
2137                         return (error);
2138                 }
2139                 if (nd->nd_repstat) {
2140                         /*
2141                          * In case the rpc gets retried, roll
2142                          * the uio fields changed by nfsm_uiombuf()
2143                          * back.
2144                          */
2145                         uiop->uio_offset -= len;
2146                         uiop->uio_resid += len;
2147                         uiop->uio_iov->iov_base =
2148                             (char *)uiop->uio_iov->iov_base - len;
2149                         uiop->uio_iov->iov_len += len;
2150                 }
2151                 if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2152                         error = nfscl_wcc_data(nd, vp, nap, attrflagp,
2153                             &wccflag, &tmp_off);
2154                         if (error)
2155                                 goto nfsmout;
2156                 }
2157                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2158                     (ND_NFSV4 | ND_NOMOREDATA) &&
2159                     nd->nd_repstat == NFSERR_NOTSAME && do_append) {
2160                         /*
2161                          * Verify of the file's size failed, so redo the
2162                          * write using the file's size as returned in
2163                          * the wcc attributes.
2164                          */
2165                         if (tmp_off + tsiz <= nmp->nm_maxfilesize) {
2166                                 do_append = false;
2167                                 uiop->uio_offset = tmp_off;
2168                                 m_freem(nd->nd_mrep);
2169                                 nd->nd_mrep = NULL;
2170                                 continue;
2171                         } else
2172                                 nd->nd_repstat = EFBIG;
2173                 }
2174                 if (!nd->nd_repstat) {
2175                         if (do_append) {
2176                                 /* Strip off the Write reply status. */
2177                                 do_append = false;
2178                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2179                         }
2180                         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2181                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
2182                                         + NFSX_VERF);
2183                                 rlen = fxdr_unsigned(int, *tl++);
2184                                 if (rlen == 0) {
2185                                         error = NFSERR_IO;
2186                                         goto nfsmout;
2187                                 } else if (rlen < len) {
2188                                         backup = len - rlen;
2189                                         uiop->uio_iov->iov_base =
2190                                             (char *)uiop->uio_iov->iov_base -
2191                                             backup;
2192                                         uiop->uio_iov->iov_len += backup;
2193                                         uiop->uio_offset -= backup;
2194                                         uiop->uio_resid += backup;
2195                                         len = rlen;
2196                                 }
2197                                 commit = fxdr_unsigned(int, *tl++);
2198
2199                                 /*
2200                                  * Return the lowest commitment level
2201                                  * obtained by any of the RPCs.
2202                                  */
2203                                 if (committed == NFSWRITE_FILESYNC)
2204                                         committed = commit;
2205                                 else if (committed == NFSWRITE_DATASYNC &&
2206                                         commit == NFSWRITE_UNSTABLE)
2207                                         committed = commit;
2208                                 NFSLOCKMNT(nmp);
2209                                 if (!NFSHASWRITEVERF(nmp)) {
2210                                         NFSBCOPY((caddr_t)tl,
2211                                             (caddr_t)&nmp->nm_verf[0],
2212                                             NFSX_VERF);
2213                                         NFSSETWRITEVERF(nmp);
2214                                 } else if (NFSBCMP(tl, nmp->nm_verf,
2215                                     NFSX_VERF) && *must_commit != 2) {
2216                                         *must_commit = 1;
2217                                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
2218                                 }
2219                                 NFSUNLOCKMNT(nmp);
2220                         }
2221                         if (nd->nd_flag & ND_NFSV4)
2222                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2223                         if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
2224                                 error = nfsm_loadattr(nd, nap);
2225                                 if (!error)
2226                                         *attrflagp = NFS_LATTR_NOSHRINK;
2227                         }
2228                 } else {
2229                         error = nd->nd_repstat;
2230                 }
2231                 if (error)
2232                         goto nfsmout;
2233                 NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
2234                 m_freem(nd->nd_mrep);
2235                 nd->nd_mrep = NULL;
2236                 tsiz -= len;
2237         }
2238 nfsmout:
2239         if (nd->nd_mrep != NULL)
2240                 m_freem(nd->nd_mrep);
2241         *iomode = committed;
2242         if (nd->nd_repstat && !error)
2243                 error = nd->nd_repstat;
2244         free(nd, M_TEMP);
2245         return (error);
2246 }
2247
2248 /*
2249  * Do an nfs deallocate operation.
2250  */
2251 int
2252 nfsrpc_deallocate(vnode_t vp, off_t offs, off_t len, struct nfsvattr *nap,
2253     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
2254 {
2255         int error, expireret = 0, openerr, retrycnt;
2256         uint32_t clidrev = 0;
2257         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2258         struct nfsfh *nfhp;
2259         nfsv4stateid_t stateid;
2260         void *lckp;
2261
2262         if (nmp->nm_clp != NULL)
2263                 clidrev = nmp->nm_clp->nfsc_clientidrev;
2264         retrycnt = 0;
2265         do {
2266                 lckp = NULL;
2267                 openerr = 1;
2268                 nfhp = VTONFS(vp)->n_fhp;
2269                 error = nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
2270                     NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
2271                 if (error != 0) {
2272                         /*
2273                          * No Open stateid, so try and open the file
2274                          * now.
2275                          */
2276                         openerr = nfsrpc_open(vp, FWRITE, cred, p);
2277                         if (openerr == 0)
2278                                 nfscl_getstateid(vp, nfhp->nfh_fh,
2279                                     nfhp->nfh_len, NFSV4OPEN_ACCESSWRITE, 0,
2280                                     cred, p, &stateid, &lckp);
2281                 }
2282                 error = nfsrpc_deallocaterpc(vp, offs, len, &stateid, nap,
2283                     attrflagp, cred, p);
2284                 if (error == NFSERR_STALESTATEID)
2285                         nfscl_initiate_recovery(nmp->nm_clp);
2286                 if (lckp != NULL)
2287                         nfscl_lockderef(lckp);
2288                 if (openerr == 0)
2289                         nfsrpc_close(vp, 0, p);
2290                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2291                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2292                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
2293                         (void) nfs_catnap(PZERO, error, "nfs_deallocate");
2294                 } else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
2295                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
2296                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2297                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
2298                         error = EIO;
2299                 }
2300                 retrycnt++;
2301         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2302             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2303             error == NFSERR_BADSESSION ||
2304             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
2305             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2306              expireret == 0 && clidrev != 0 && retrycnt < 4));
2307         if (error && retrycnt >= 4)
2308                 error = EIO;
2309         return (error);
2310 }
2311
2312 /*
2313  * The actual deallocate RPC.
2314  */
2315 static int
2316 nfsrpc_deallocaterpc(vnode_t vp, off_t offs, off_t len,
2317     nfsv4stateid_t *stateidp, struct nfsvattr *nap, int *attrflagp,
2318     struct ucred *cred, NFSPROC_T *p)
2319 {
2320         uint32_t *tl;
2321         struct nfsnode *np = VTONFS(vp);
2322         int error, wccflag;
2323         struct nfsrv_descript nfsd;
2324         struct nfsrv_descript *nd = &nfsd;
2325         nfsattrbit_t attrbits;
2326
2327         *attrflagp = 0;
2328         NFSCL_REQSTART(nd, NFSPROC_DEALLOCATE, vp, cred);
2329         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
2330         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
2331         txdr_hyper(offs, tl);
2332         tl += 2;
2333         txdr_hyper(len, tl);
2334         NFSWRITEGETATTR_ATTRBIT(&attrbits);
2335         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
2336         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2337         nfsrv_putattrbit(nd, &attrbits);
2338         error = nfscl_request(nd, vp, p, cred);
2339         if (error != 0)
2340                 return (error);
2341         wccflag = 0;
2342         error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, NULL);
2343         if (error != 0)
2344                 goto nfsmout;
2345         if (nd->nd_repstat == 0) {
2346                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2347                 error = nfsm_loadattr(nd, nap);
2348                 if (error != 0)
2349                         goto nfsmout;
2350                 *attrflagp = NFS_LATTR_NOSHRINK;
2351         }
2352         NFSWRITERPC_SETTIME(wccflag, np, nap, 1);
2353 nfsmout:
2354         m_freem(nd->nd_mrep);
2355         if (nd->nd_repstat != 0 && error == 0)
2356                 error = nd->nd_repstat;
2357         return (error);
2358 }
2359
2360 /*
2361  * nfs mknod rpc
2362  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
2363  * mode set to specify the file type and the size field for rdev.
2364  */
2365 int
2366 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2367     u_int32_t rdev, __enum_uint8(vtype) vtyp, struct ucred *cred, NFSPROC_T *p,
2368     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2369     int *attrflagp, int *dattrflagp)
2370 {
2371         u_int32_t *tl;
2372         int error = 0;
2373         struct nfsrv_descript nfsd, *nd = &nfsd;
2374         nfsattrbit_t attrbits;
2375
2376         *nfhpp = NULL;
2377         *attrflagp = 0;
2378         *dattrflagp = 0;
2379         if (namelen > NFS_MAXNAMLEN)
2380                 return (ENAMETOOLONG);
2381         NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp, cred);
2382         if (nd->nd_flag & ND_NFSV4) {
2383                 if (vtyp == VBLK || vtyp == VCHR) {
2384                         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2385                         *tl++ = vtonfsv34_type(vtyp);
2386                         *tl++ = txdr_unsigned(NFSMAJOR(rdev));
2387                         *tl = txdr_unsigned(NFSMINOR(rdev));
2388                 } else {
2389                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2390                         *tl = vtonfsv34_type(vtyp);
2391                 }
2392         }
2393         (void) nfsm_strtom(nd, name, namelen);
2394         if (nd->nd_flag & ND_NFSV3) {
2395                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2396                 *tl = vtonfsv34_type(vtyp);
2397         }
2398         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2399                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2400         if ((nd->nd_flag & ND_NFSV3) &&
2401             (vtyp == VCHR || vtyp == VBLK)) {
2402                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2403                 *tl++ = txdr_unsigned(NFSMAJOR(rdev));
2404                 *tl = txdr_unsigned(NFSMINOR(rdev));
2405         }
2406         if (nd->nd_flag & ND_NFSV4) {
2407                 NFSGETATTR_ATTRBIT(&attrbits);
2408                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2409                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2410                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2411                 (void) nfsrv_putattrbit(nd, &attrbits);
2412         }
2413         if (nd->nd_flag & ND_NFSV2)
2414                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
2415         error = nfscl_request(nd, dvp, p, cred);
2416         if (error)
2417                 return (error);
2418         if (nd->nd_flag & ND_NFSV4)
2419                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2420         if (!nd->nd_repstat) {
2421                 if (nd->nd_flag & ND_NFSV4) {
2422                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2423                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2424                         if (error)
2425                                 goto nfsmout;
2426                 }
2427                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2428                 if (error)
2429                         goto nfsmout;
2430         }
2431         if (nd->nd_flag & ND_NFSV3)
2432                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2433         if (!error && nd->nd_repstat)
2434                 error = nd->nd_repstat;
2435 nfsmout:
2436         m_freem(nd->nd_mrep);
2437         return (error);
2438 }
2439
2440 /*
2441  * nfs file create call
2442  * Mostly just call the approriate routine. (I separated out v4, so that
2443  * error recovery wouldn't be as difficult.)
2444  */
2445 int
2446 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2447     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2448     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2449     int *attrflagp, int *dattrflagp)
2450 {
2451         int error = 0, newone, expireret = 0, retrycnt, unlocked;
2452         struct nfsclowner *owp;
2453         struct nfscldeleg *dp;
2454         struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2455         u_int32_t clidrev;
2456
2457         if (NFSHASNFSV4(nmp)) {
2458             retrycnt = 0;
2459             do {
2460                 dp = NULL;
2461                 error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
2462                     NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
2463                     NULL, 1, true);
2464                 if (error)
2465                         return (error);
2466                 if (nmp->nm_clp != NULL)
2467                         clidrev = nmp->nm_clp->nfsc_clientidrev;
2468                 else
2469                         clidrev = 0;
2470                 if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
2471                     nfs_numnfscbd == 0 || retrycnt > 0)
2472                         error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
2473                           fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2474                           attrflagp, dattrflagp, &unlocked);
2475                 else
2476                         error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
2477                           cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2478                           attrflagp, dattrflagp, &unlocked);
2479                 /*
2480                  * There is no need to invalidate cached attributes here,
2481                  * since new post-delegation issue attributes are always
2482                  * returned by nfsrpc_createv4() and these will update the
2483                  * attribute cache.
2484                  */
2485                 if (dp != NULL)
2486                         (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2487                             (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2488                 nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2489                 if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2490                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2491                     error == NFSERR_BADSESSION) {
2492                         (void) nfs_catnap(PZERO, error, "nfs_open");
2493                 } else if ((error == NFSERR_EXPIRED ||
2494                     error == NFSERR_BADSTATEID) && clidrev != 0) {
2495                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2496                         retrycnt++;
2497                 }
2498             } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2499                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2500                 error == NFSERR_BADSESSION ||
2501                 ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2502                  expireret == 0 && clidrev != 0 && retrycnt < 4));
2503             if (error && retrycnt >= 4)
2504                     error = EIO;
2505         } else {
2506                 error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2507                     fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp);
2508         }
2509         return (error);
2510 }
2511
2512 /*
2513  * The create rpc for v2 and 3.
2514  */
2515 static int
2516 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2517     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2518     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2519     int *attrflagp, int *dattrflagp)
2520 {
2521         u_int32_t *tl;
2522         int error = 0;
2523         struct nfsrv_descript nfsd, *nd = &nfsd;
2524
2525         *nfhpp = NULL;
2526         *attrflagp = 0;
2527         *dattrflagp = 0;
2528         if (namelen > NFS_MAXNAMLEN)
2529                 return (ENAMETOOLONG);
2530         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2531         (void) nfsm_strtom(nd, name, namelen);
2532         if (nd->nd_flag & ND_NFSV3) {
2533                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2534                 if (fmode & O_EXCL) {
2535                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2536                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2537                         *tl++ = cverf.lval[0];
2538                         *tl = cverf.lval[1];
2539                 } else {
2540                         *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2541                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
2542                 }
2543         } else {
2544                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2545         }
2546         error = nfscl_request(nd, dvp, p, cred);
2547         if (error)
2548                 return (error);
2549         if (nd->nd_repstat == 0) {
2550                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2551                 if (error)
2552                         goto nfsmout;
2553         }
2554         if (nd->nd_flag & ND_NFSV3)
2555                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2556         if (nd->nd_repstat != 0 && error == 0)
2557                 error = nd->nd_repstat;
2558 nfsmout:
2559         m_freem(nd->nd_mrep);
2560         return (error);
2561 }
2562
2563 static int
2564 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2565     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2566     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2567     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2568     int *dattrflagp, int *unlockedp)
2569 {
2570         u_int32_t *tl;
2571         int error = 0, deleg, newone, ret, acesize, limitby;
2572         struct nfsrv_descript nfsd, *nd = &nfsd;
2573         struct nfsclopen *op;
2574         struct nfscldeleg *dp = NULL;
2575         struct nfsnode *np;
2576         struct nfsfh *nfhp;
2577         nfsattrbit_t attrbits;
2578         nfsv4stateid_t stateid;
2579         u_int32_t rflags;
2580         struct nfsmount *nmp;
2581         struct nfsclsession *tsep;
2582
2583         nmp = VFSTONFS(dvp->v_mount);
2584         np = VTONFS(dvp);
2585         *unlockedp = 0;
2586         *nfhpp = NULL;
2587         *dpp = NULL;
2588         *attrflagp = 0;
2589         *dattrflagp = 0;
2590         if (namelen > NFS_MAXNAMLEN)
2591                 return (ENAMETOOLONG);
2592         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2593         /*
2594          * For V4, this is actually an Open op.
2595          */
2596         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2597         *tl++ = txdr_unsigned(owp->nfsow_seqid);
2598         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2599             NFSV4OPEN_ACCESSREAD);
2600         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2601         tsep = nfsmnt_mdssession(nmp);
2602         *tl++ = tsep->nfsess_clientid.lval[0];
2603         *tl = tsep->nfsess_clientid.lval[1];
2604         (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2605         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2606         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2607         if (fmode & O_EXCL) {
2608                 if (NFSHASNFSV4N(nmp)) {
2609                         if (NFSHASSESSPERSIST(nmp)) {
2610                                 /* Use GUARDED for persistent sessions. */
2611                                 *tl = txdr_unsigned(NFSCREATE_GUARDED);
2612                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2613                         } else {
2614                                 /* Otherwise, use EXCLUSIVE4_1. */
2615                                 *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2616                                 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2617                                 *tl++ = cverf.lval[0];
2618                                 *tl = cverf.lval[1];
2619                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2620                         }
2621                 } else {
2622                         /* NFSv4.0 */
2623                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2624                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2625                         *tl++ = cverf.lval[0];
2626                         *tl = cverf.lval[1];
2627                 }
2628         } else {
2629                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2630                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2631         }
2632         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2633         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2634         (void) nfsm_strtom(nd, name, namelen);
2635         /* Get the new file's handle and attributes. */
2636         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2637         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2638         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2639         NFSGETATTR_ATTRBIT(&attrbits);
2640         (void) nfsrv_putattrbit(nd, &attrbits);
2641         /* Get the directory's post-op attributes. */
2642         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2643         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2644         (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2645         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2646         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2647         (void) nfsrv_putattrbit(nd, &attrbits);
2648         error = nfscl_request(nd, dvp, p, cred);
2649         if (error)
2650                 return (error);
2651         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2652         if (nd->nd_repstat == 0) {
2653                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2654                     6 * NFSX_UNSIGNED);
2655                 stateid.seqid = *tl++;
2656                 stateid.other[0] = *tl++;
2657                 stateid.other[1] = *tl++;
2658                 stateid.other[2] = *tl;
2659                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2660                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2661                 if (error)
2662                         goto nfsmout;
2663                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2664                 deleg = fxdr_unsigned(int, *tl);
2665                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
2666                     deleg == NFSV4OPEN_DELEGATEWRITE) {
2667                         if (!(owp->nfsow_clp->nfsc_flags &
2668                               NFSCLFLAGS_FIRSTDELEG))
2669                                 owp->nfsow_clp->nfsc_flags |=
2670                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2671                         dp = malloc(
2672                             sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2673                             M_NFSCLDELEG, M_WAITOK);
2674                         LIST_INIT(&dp->nfsdl_owner);
2675                         LIST_INIT(&dp->nfsdl_lock);
2676                         dp->nfsdl_clp = owp->nfsow_clp;
2677                         newnfs_copyincred(cred, &dp->nfsdl_cred);
2678                         nfscl_lockinit(&dp->nfsdl_rwlock);
2679                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2680                             NFSX_UNSIGNED);
2681                         dp->nfsdl_stateid.seqid = *tl++;
2682                         dp->nfsdl_stateid.other[0] = *tl++;
2683                         dp->nfsdl_stateid.other[1] = *tl++;
2684                         dp->nfsdl_stateid.other[2] = *tl++;
2685                         ret = fxdr_unsigned(int, *tl);
2686                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2687                                 dp->nfsdl_flags = NFSCLDL_WRITE;
2688                                 /*
2689                                  * Indicates how much the file can grow.
2690                                  */
2691                                 NFSM_DISSECT(tl, u_int32_t *,
2692                                     3 * NFSX_UNSIGNED);
2693                                 limitby = fxdr_unsigned(int, *tl++);
2694                                 switch (limitby) {
2695                                 case NFSV4OPEN_LIMITSIZE:
2696                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
2697                                         break;
2698                                 case NFSV4OPEN_LIMITBLOCKS:
2699                                         dp->nfsdl_sizelimit =
2700                                             fxdr_unsigned(u_int64_t, *tl++);
2701                                         dp->nfsdl_sizelimit *=
2702                                             fxdr_unsigned(u_int64_t, *tl);
2703                                         break;
2704                                 default:
2705                                         error = NFSERR_BADXDR;
2706                                         goto nfsmout;
2707                                 }
2708                         } else {
2709                                 dp->nfsdl_flags = NFSCLDL_READ;
2710                         }
2711                         if (ret)
2712                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2713                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
2714                             &ret, &acesize, p);
2715                         if (error)
2716                                 goto nfsmout;
2717                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
2718                         error = NFSERR_BADXDR;
2719                         goto nfsmout;
2720                 }
2721                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2722                 if (error)
2723                         goto nfsmout;
2724                 /* Get rid of the PutFH and Getattr status values. */
2725                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2726                 /* Load the directory attributes. */
2727                 error = nfsm_loadattr(nd, dnap);
2728                 if (error)
2729                         goto nfsmout;
2730                 *dattrflagp = 1;
2731                 if (dp != NULL && *attrflagp) {
2732                         dp->nfsdl_change = nnap->na_filerev;
2733                         dp->nfsdl_modtime = nnap->na_mtime;
2734                         dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2735                 }
2736                 /*
2737                  * We can now complete the Open state.
2738                  */
2739                 nfhp = *nfhpp;
2740                 if (dp != NULL) {
2741                         dp->nfsdl_fhlen = nfhp->nfh_len;
2742                         NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2743                 }
2744                 /*
2745                  * Get an Open structure that will be
2746                  * attached to the OpenOwner, acquired already.
2747                  */
2748                 error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
2749                     (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2750                     cred, p, NULL, &op, &newone, NULL, 0, false);
2751                 if (error)
2752                         goto nfsmout;
2753                 op->nfso_stateid = stateid;
2754                 newnfs_copyincred(cred, &op->nfso_cred);
2755                 if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2756                     do {
2757                         ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2758                             nfhp->nfh_len, op, cred, p);
2759                         if (ret == NFSERR_DELAY)
2760                             (void) nfs_catnap(PZERO, ret, "nfs_create");
2761                     } while (ret == NFSERR_DELAY);
2762                     error = ret;
2763                 }
2764
2765                 /*
2766                  * If the server is handing out delegations, but we didn't
2767                  * get one because an OpenConfirm was required, try the
2768                  * Open again, to get a delegation. This is a harmless no-op,
2769                  * from a server's point of view.
2770                  */
2771                 if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2772                     (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2773                     !error && dp == NULL) {
2774                     KASSERT(!NFSHASNFSV4N(nmp),
2775                         ("nfsrpc_createv4: result confirm"));
2776                     do {
2777                         ret = nfsrpc_openrpc(VFSTONFS(dvp->v_mount), dvp,
2778                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2779                             nfhp->nfh_fh, nfhp->nfh_len,
2780                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2781                             name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2782                         if (ret == NFSERR_DELAY)
2783                             (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2784                     } while (ret == NFSERR_DELAY);
2785                     if (ret) {
2786                         if (dp != NULL) {
2787                                 free(dp, M_NFSCLDELEG);
2788                                 dp = NULL;
2789                         }
2790                         if (ret == NFSERR_STALECLIENTID ||
2791                             ret == NFSERR_STALEDONTRECOVER ||
2792                             ret == NFSERR_BADSESSION)
2793                                 error = ret;
2794                     }
2795                 }
2796                 nfscl_openrelease(nmp, op, error, newone);
2797                 *unlockedp = 1;
2798         }
2799         if (nd->nd_repstat != 0 && error == 0)
2800                 error = nd->nd_repstat;
2801         if (error == NFSERR_STALECLIENTID)
2802                 nfscl_initiate_recovery(owp->nfsow_clp);
2803 nfsmout:
2804         if (!error)
2805                 *dpp = dp;
2806         else if (dp != NULL)
2807                 free(dp, M_NFSCLDELEG);
2808         m_freem(nd->nd_mrep);
2809         return (error);
2810 }
2811
2812 /*
2813  * Nfs remove rpc
2814  */
2815 int
2816 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2817     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
2818 {
2819         u_int32_t *tl;
2820         struct nfsrv_descript nfsd, *nd = &nfsd;
2821         struct nfsnode *np;
2822         struct nfsmount *nmp;
2823         nfsv4stateid_t dstateid;
2824         int error, ret = 0, i;
2825
2826         *dattrflagp = 0;
2827         if (namelen > NFS_MAXNAMLEN)
2828                 return (ENAMETOOLONG);
2829         nmp = VFSTONFS(dvp->v_mount);
2830 tryagain:
2831         if (NFSHASNFSV4(nmp) && ret == 0) {
2832                 ret = nfscl_removedeleg(vp, p, &dstateid);
2833                 if (ret == 1) {
2834                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp, cred);
2835                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2836                             NFSX_UNSIGNED);
2837                         if (NFSHASNFSV4N(nmp))
2838                                 *tl++ = 0;
2839                         else
2840                                 *tl++ = dstateid.seqid;
2841                         *tl++ = dstateid.other[0];
2842                         *tl++ = dstateid.other[1];
2843                         *tl++ = dstateid.other[2];
2844                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2845                         np = VTONFS(dvp);
2846                         (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2847                             np->n_fhp->nfh_len, 0);
2848                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2849                         *tl = txdr_unsigned(NFSV4OP_REMOVE);
2850                 }
2851         } else {
2852                 ret = 0;
2853         }
2854         if (ret == 0)
2855                 NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp, cred);
2856         (void) nfsm_strtom(nd, name, namelen);
2857         error = nfscl_request(nd, dvp, p, cred);
2858         if (error)
2859                 return (error);
2860         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2861                 /* For NFSv4, parse out any Delereturn replies. */
2862                 if (ret > 0 && nd->nd_repstat != 0 &&
2863                     (nd->nd_flag & ND_NOMOREDATA)) {
2864                         /*
2865                          * If the Delegreturn failed, try again without
2866                          * it. The server will Recall, as required.
2867                          */
2868                         m_freem(nd->nd_mrep);
2869                         goto tryagain;
2870                 }
2871                 for (i = 0; i < (ret * 2); i++) {
2872                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2873                             ND_NFSV4) {
2874                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2875                             if (*(tl + 1))
2876                                 nd->nd_flag |= ND_NOMOREDATA;
2877                         }
2878                 }
2879                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2880         }
2881         if (nd->nd_repstat && !error)
2882                 error = nd->nd_repstat;
2883 nfsmout:
2884         m_freem(nd->nd_mrep);
2885         return (error);
2886 }
2887
2888 /*
2889  * Do an nfs rename rpc.
2890  */
2891 int
2892 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2893     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2894     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2895     int *fattrflagp, int *tattrflagp)
2896 {
2897         u_int32_t *tl;
2898         struct nfsrv_descript nfsd, *nd = &nfsd;
2899         struct nfsmount *nmp;
2900         struct nfsnode *np;
2901         nfsattrbit_t attrbits;
2902         nfsv4stateid_t fdstateid, tdstateid;
2903         int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2904
2905         *fattrflagp = 0;
2906         *tattrflagp = 0;
2907         nmp = VFSTONFS(fdvp->v_mount);
2908         if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2909                 return (ENAMETOOLONG);
2910 tryagain:
2911         if (NFSHASNFSV4(nmp) && ret == 0) {
2912                 ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2913                     &tdstateid, &gottd, p);
2914                 if (gotfd && gottd) {
2915                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp, cred);
2916                 } else if (gotfd) {
2917                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp, cred);
2918                 } else if (gottd) {
2919                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp, cred);
2920                 }
2921                 if (gotfd) {
2922                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2923                         if (NFSHASNFSV4N(nmp))
2924                                 *tl++ = 0;
2925                         else
2926                                 *tl++ = fdstateid.seqid;
2927                         *tl++ = fdstateid.other[0];
2928                         *tl++ = fdstateid.other[1];
2929                         *tl = fdstateid.other[2];
2930                         if (gottd) {
2931                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2932                                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2933                                 np = VTONFS(tvp);
2934                                 (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2935                                     np->n_fhp->nfh_len, 0);
2936                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2937                                 *tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2938                         }
2939                 }
2940                 if (gottd) {
2941                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2942                         if (NFSHASNFSV4N(nmp))
2943                                 *tl++ = 0;
2944                         else
2945                                 *tl++ = tdstateid.seqid;
2946                         *tl++ = tdstateid.other[0];
2947                         *tl++ = tdstateid.other[1];
2948                         *tl = tdstateid.other[2];
2949                 }
2950                 if (ret > 0) {
2951                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2952                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2953                         np = VTONFS(fdvp);
2954                         (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2955                             np->n_fhp->nfh_len, 0);
2956                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2957                         *tl = txdr_unsigned(NFSV4OP_SAVEFH);
2958                 }
2959         } else {
2960                 ret = 0;
2961         }
2962         if (ret == 0)
2963                 NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp, cred);
2964         if (nd->nd_flag & ND_NFSV4) {
2965                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2966                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2967                 NFSWCCATTR_ATTRBIT(&attrbits);
2968                 (void) nfsrv_putattrbit(nd, &attrbits);
2969                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2970                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2971                 (void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2972                     VTONFS(tdvp)->n_fhp->nfh_len, 0);
2973                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2974                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2975                 (void) nfsrv_putattrbit(nd, &attrbits);
2976                 nd->nd_flag |= ND_V4WCCATTR;
2977                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2978                 *tl = txdr_unsigned(NFSV4OP_RENAME);
2979         }
2980         (void) nfsm_strtom(nd, fnameptr, fnamelen);
2981         if (!(nd->nd_flag & ND_NFSV4))
2982                 (void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2983                         VTONFS(tdvp)->n_fhp->nfh_len, 0);
2984         (void) nfsm_strtom(nd, tnameptr, tnamelen);
2985         error = nfscl_request(nd, fdvp, p, cred);
2986         if (error)
2987                 return (error);
2988         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2989                 /* For NFSv4, parse out any Delereturn replies. */
2990                 if (ret > 0 && nd->nd_repstat != 0 &&
2991                     (nd->nd_flag & ND_NOMOREDATA)) {
2992                         /*
2993                          * If the Delegreturn failed, try again without
2994                          * it. The server will Recall, as required.
2995                          */
2996                         m_freem(nd->nd_mrep);
2997                         goto tryagain;
2998                 }
2999                 for (i = 0; i < (ret * 2); i++) {
3000                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
3001                             ND_NFSV4) {
3002                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3003                             if (*(tl + 1)) {
3004                                 if (i == 1 && ret > 1) {
3005                                     /*
3006                                      * If the Delegreturn failed, try again
3007                                      * without it. The server will Recall, as
3008                                      * required.
3009                                      * If ret > 1, the second iteration of this
3010                                      * loop is the second DelegReturn result.
3011                                      */
3012                                     m_freem(nd->nd_mrep);
3013                                     goto tryagain;
3014                                 } else {
3015                                     nd->nd_flag |= ND_NOMOREDATA;
3016                                 }
3017                             }
3018                         }
3019                 }
3020                 /* Now, the first wcc attribute reply. */
3021                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
3022                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3023                         if (*(tl + 1))
3024                                 nd->nd_flag |= ND_NOMOREDATA;
3025                 }
3026                 error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, NULL);
3027                 /* and the second wcc attribute reply. */
3028                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
3029                     !error) {
3030                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3031                         if (*(tl + 1))
3032                                 nd->nd_flag |= ND_NOMOREDATA;
3033                 }
3034                 if (!error)
3035                         error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
3036                             NULL, NULL);
3037         }
3038         if (nd->nd_repstat && !error)
3039                 error = nd->nd_repstat;
3040 nfsmout:
3041         m_freem(nd->nd_mrep);
3042         return (error);
3043 }
3044
3045 /*
3046  * nfs hard link create rpc
3047  */
3048 int
3049 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
3050     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3051     struct nfsvattr *nap, int *attrflagp, int *dattrflagp)
3052 {
3053         u_int32_t *tl;
3054         struct nfsrv_descript nfsd, *nd = &nfsd;
3055         nfsattrbit_t attrbits;
3056         int error = 0;
3057
3058         *attrflagp = 0;
3059         *dattrflagp = 0;
3060         if (namelen > NFS_MAXNAMLEN)
3061                 return (ENAMETOOLONG);
3062         NFSCL_REQSTART(nd, NFSPROC_LINK, vp, cred);
3063         if (nd->nd_flag & ND_NFSV4) {
3064                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3065                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
3066         }
3067         (void)nfsm_fhtom(VFSTONFS(dvp->v_mount), nd, VTONFS(dvp)->n_fhp->nfh_fh,
3068                 VTONFS(dvp)->n_fhp->nfh_len, 0);
3069         if (nd->nd_flag & ND_NFSV4) {
3070                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3071                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3072                 NFSWCCATTR_ATTRBIT(&attrbits);
3073                 (void) nfsrv_putattrbit(nd, &attrbits);
3074                 nd->nd_flag |= ND_V4WCCATTR;
3075                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3076                 *tl = txdr_unsigned(NFSV4OP_LINK);
3077         }
3078         (void) nfsm_strtom(nd, name, namelen);
3079         error = nfscl_request(nd, vp, p, cred);
3080         if (error)
3081                 return (error);
3082         if (nd->nd_flag & ND_NFSV3) {
3083                 error = nfscl_postop_attr(nd, nap, attrflagp);
3084                 if (!error)
3085                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
3086                             NULL, NULL);
3087         } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
3088                 /*
3089                  * First, parse out the PutFH and Getattr result.
3090                  */
3091                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3092                 if (!(*(tl + 1)))
3093                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3094                 if (*(tl + 1))
3095                         nd->nd_flag |= ND_NOMOREDATA;
3096                 /*
3097                  * Get the pre-op attributes.
3098                  */
3099                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3100         }
3101         if (nd->nd_repstat && !error)
3102                 error = nd->nd_repstat;
3103 nfsmout:
3104         m_freem(nd->nd_mrep);
3105         return (error);
3106 }
3107
3108 /*
3109  * nfs symbolic link create rpc
3110  */
3111 int
3112 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, const char *target,
3113     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3114     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3115     int *dattrflagp)
3116 {
3117         u_int32_t *tl;
3118         struct nfsrv_descript nfsd, *nd = &nfsd;
3119         struct nfsmount *nmp;
3120         int slen, error = 0;
3121
3122         *nfhpp = NULL;
3123         *attrflagp = 0;
3124         *dattrflagp = 0;
3125         nmp = VFSTONFS(dvp->v_mount);
3126         slen = strlen(target);
3127         if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
3128                 return (ENAMETOOLONG);
3129         NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp, cred);
3130         if (nd->nd_flag & ND_NFSV4) {
3131                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3132                 *tl = txdr_unsigned(NFLNK);
3133                 (void) nfsm_strtom(nd, target, slen);
3134         }
3135         (void) nfsm_strtom(nd, name, namelen);
3136         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3137                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
3138         if (!(nd->nd_flag & ND_NFSV4))
3139                 (void) nfsm_strtom(nd, target, slen);
3140         if (nd->nd_flag & ND_NFSV2)
3141                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3142         error = nfscl_request(nd, dvp, p, cred);
3143         if (error)
3144                 return (error);
3145         if (nd->nd_flag & ND_NFSV4)
3146                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3147         if ((nd->nd_flag & ND_NFSV3) && !error) {
3148                 if (!nd->nd_repstat)
3149                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3150                 if (!error)
3151                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
3152                             NULL, NULL);
3153         }
3154         if (nd->nd_repstat && !error)
3155                 error = nd->nd_repstat;
3156         m_freem(nd->nd_mrep);
3157         /*
3158          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3159          * Only do this if vfs.nfs.ignore_eexist is set.
3160          * Never do this for NFSv4.1 or later minor versions, since sessions
3161          * should guarantee "exactly once" RPC semantics.
3162          */
3163         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3164             nmp->nm_minorvers == 0))
3165                 error = 0;
3166         return (error);
3167 }
3168
3169 /*
3170  * nfs make dir rpc
3171  */
3172 int
3173 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
3174     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3175     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3176     int *dattrflagp)
3177 {
3178         u_int32_t *tl;
3179         struct nfsrv_descript nfsd, *nd = &nfsd;
3180         nfsattrbit_t attrbits;
3181         int error = 0;
3182         struct nfsfh *fhp;
3183         struct nfsmount *nmp;
3184
3185         *nfhpp = NULL;
3186         *attrflagp = 0;
3187         *dattrflagp = 0;
3188         nmp = VFSTONFS(dvp->v_mount);
3189         fhp = VTONFS(dvp)->n_fhp;
3190         if (namelen > NFS_MAXNAMLEN)
3191                 return (ENAMETOOLONG);
3192         NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp, cred);
3193         if (nd->nd_flag & ND_NFSV4) {
3194                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3195                 *tl = txdr_unsigned(NFDIR);
3196         }
3197         (void) nfsm_strtom(nd, name, namelen);
3198         nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3199         if (nd->nd_flag & ND_NFSV4) {
3200                 NFSGETATTR_ATTRBIT(&attrbits);
3201                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3202                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
3203                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3204                 (void) nfsrv_putattrbit(nd, &attrbits);
3205                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3206                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
3207                 (void)nfsm_fhtom(nmp, nd, fhp->nfh_fh, fhp->nfh_len, 0);
3208                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3209                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3210                 (void) nfsrv_putattrbit(nd, &attrbits);
3211         }
3212         error = nfscl_request(nd, dvp, p, cred);
3213         if (error)
3214                 return (error);
3215         if (nd->nd_flag & ND_NFSV4)
3216                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3217         if (!nd->nd_repstat && !error) {
3218                 if (nd->nd_flag & ND_NFSV4) {
3219                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3220                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
3221                 }
3222                 if (!error)
3223                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3224                 if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
3225                         /* Get rid of the PutFH and Getattr status values. */
3226                         NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3227                         /* Load the directory attributes. */
3228                         error = nfsm_loadattr(nd, dnap);
3229                         if (error == 0)
3230                                 *dattrflagp = 1;
3231                 }
3232         }
3233         if ((nd->nd_flag & ND_NFSV3) && !error)
3234                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3235         if (nd->nd_repstat && !error)
3236                 error = nd->nd_repstat;
3237 nfsmout:
3238         m_freem(nd->nd_mrep);
3239         /*
3240          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3241          * Only do this if vfs.nfs.ignore_eexist is set.
3242          * Never do this for NFSv4.1 or later minor versions, since sessions
3243          * should guarantee "exactly once" RPC semantics.
3244          */
3245         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3246             nmp->nm_minorvers == 0))
3247                 error = 0;
3248         return (error);
3249 }
3250
3251 /*
3252  * nfs remove directory call
3253  */
3254 int
3255 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
3256     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
3257 {
3258         struct nfsrv_descript nfsd, *nd = &nfsd;
3259         int error = 0;
3260
3261         *dattrflagp = 0;
3262         if (namelen > NFS_MAXNAMLEN)
3263                 return (ENAMETOOLONG);
3264         NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp, cred);
3265         (void) nfsm_strtom(nd, name, namelen);
3266         error = nfscl_request(nd, dvp, p, cred);
3267         if (error)
3268                 return (error);
3269         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3270                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3271         if (nd->nd_repstat && !error)
3272                 error = nd->nd_repstat;
3273         m_freem(nd->nd_mrep);
3274         /*
3275          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
3276          */
3277         if (error == ENOENT)
3278                 error = 0;
3279         return (error);
3280 }
3281
3282 /*
3283  * Readdir rpc.
3284  * Always returns with either uio_resid unchanged, if you are at the
3285  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
3286  * filled in.
3287  * I felt this would allow caching of directory blocks more easily
3288  * than returning a pertially filled block.
3289  * Directory offset cookies:
3290  * Oh my, what to do with them...
3291  * I can think of three ways to deal with them:
3292  * 1 - have the layer above these RPCs maintain a map between logical
3293  *     directory byte offsets and the NFS directory offset cookies
3294  * 2 - pass the opaque directory offset cookies up into userland
3295  *     and let the libc functions deal with them, via the system call
3296  * 3 - return them to userland in the "struct dirent", so future versions
3297  *     of libc can use them and do whatever is necessary to make things work
3298  *     above these rpc calls, in the meantime
3299  * For now, I do #3 by "hiding" the directory offset cookies after the
3300  * d_name field in struct dirent. This is space inside d_reclen that
3301  * will be ignored by anything that doesn't know about them.
3302  * The directory offset cookies are filled in as the last 8 bytes of
3303  * each directory entry, after d_name. Someday, the userland libc
3304  * functions may be able to use these. In the meantime, it satisfies
3305  * OpenBSD's requirements for cookies being returned.
3306  * If expects the directory offset cookie for the read to be in uio_offset
3307  * and returns the one for the next entry after this directory block in
3308  * there, as well.
3309  */
3310 int
3311 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3312     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3313     int *eofp)
3314 {
3315         int len, left;
3316         struct dirent *dp = NULL;
3317         u_int32_t *tl;
3318         nfsquad_t cookie, ncookie;
3319         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3320         struct nfsnode *dnp = VTONFS(vp);
3321         struct nfsvattr nfsva;
3322         struct nfsrv_descript nfsd, *nd = &nfsd;
3323         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3324         int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
3325         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3326         char *cp;
3327         nfsattrbit_t attrbits, dattrbits;
3328         u_int32_t rderr, *tl2 = NULL;
3329         size_t tresid;
3330
3331         KASSERT(uiop->uio_iovcnt == 1 &&
3332             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3333             ("nfs readdirrpc bad uio"));
3334         ncookie.lval[0] = ncookie.lval[1] = 0;
3335         /*
3336          * There is no point in reading a lot more than uio_resid, however
3337          * adding one additional DIRBLKSIZ makes sense. Since uio_resid
3338          * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
3339          * will never make readsize > nm_readdirsize.
3340          */
3341         readsize = nmp->nm_readdirsize;
3342         if (readsize > uiop->uio_resid)
3343                 readsize = uiop->uio_resid + DIRBLKSIZ;
3344
3345         *attrflagp = 0;
3346         if (eofp)
3347                 *eofp = 0;
3348         tresid = uiop->uio_resid;
3349         cookie.lval[0] = cookiep->nfsuquad[0];
3350         cookie.lval[1] = cookiep->nfsuquad[1];
3351         nd->nd_mrep = NULL;
3352
3353         /*
3354          * For NFSv4, first create the "." and ".." entries.
3355          */
3356         if (NFSHASNFSV4(nmp)) {
3357                 reqsize = 6 * NFSX_UNSIGNED;
3358                 NFSGETATTR_ATTRBIT(&dattrbits);
3359                 NFSZERO_ATTRBIT(&attrbits);
3360                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3361                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
3362                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3363                     NFSATTRBIT_MOUNTEDONFILEID)) {
3364                         NFSSETBIT_ATTRBIT(&attrbits,
3365                             NFSATTRBIT_MOUNTEDONFILEID);
3366                         gotmnton = 1;
3367                 } else {
3368                         /*
3369                          * Must fake it. Use the fileno, except when the
3370                          * fsid is != to that of the directory. For that
3371                          * case, generate a fake fileno that is not the same.
3372                          */
3373                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3374                         gotmnton = 0;
3375                 }
3376
3377                 /*
3378                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3379                  */
3380                 if (uiop->uio_offset == 0) {
3381                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3382                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3383                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
3384                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3385                         (void) nfsrv_putattrbit(nd, &attrbits);
3386                         error = nfscl_request(nd, vp, p, cred);
3387                         if (error)
3388                             return (error);
3389                         dotfileid = 0;  /* Fake out the compiler. */
3390                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3391                             error = nfsm_loadattr(nd, &nfsva);
3392                             if (error != 0)
3393                                 goto nfsmout;
3394                             dotfileid = nfsva.na_fileid;
3395                         }
3396                         if (nd->nd_repstat == 0) {
3397                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3398                             len = fxdr_unsigned(int, *(tl + 4));
3399                             if (len > 0 && len <= NFSX_V4FHMAX)
3400                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3401                             else
3402                                 error = EPERM;
3403                             if (!error) {
3404                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3405                                 nfsva.na_mntonfileno = UINT64_MAX;
3406                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3407                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3408                                     NULL, NULL, NULL, p, cred);
3409                                 if (error) {
3410                                     dotdotfileid = dotfileid;
3411                                 } else if (gotmnton) {
3412                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3413                                         dotdotfileid = nfsva.na_mntonfileno;
3414                                     else
3415                                         dotdotfileid = nfsva.na_fileid;
3416                                 } else if (nfsva.na_filesid[0] ==
3417                                     dnp->n_vattr.na_filesid[0] &&
3418                                     nfsva.na_filesid[1] ==
3419                                     dnp->n_vattr.na_filesid[1]) {
3420                                     dotdotfileid = nfsva.na_fileid;
3421                                 } else {
3422                                     do {
3423                                         fakefileno--;
3424                                     } while (fakefileno ==
3425                                         nfsva.na_fileid);
3426                                     dotdotfileid = fakefileno;
3427                                 }
3428                             }
3429                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3430                             /*
3431                              * Lookupp returns NFSERR_NOENT when we are
3432                              * at the root, so just use the current dir.
3433                              */
3434                             nd->nd_repstat = 0;
3435                             dotdotfileid = dotfileid;
3436                         } else {
3437                             error = nd->nd_repstat;
3438                         }
3439                         m_freem(nd->nd_mrep);
3440                         if (error)
3441                             return (error);
3442                         nd->nd_mrep = NULL;
3443                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3444                         dp->d_pad0 = dp->d_pad1 = 0;
3445                         dp->d_off = 0;
3446                         dp->d_type = DT_DIR;
3447                         dp->d_fileno = dotfileid;
3448                         dp->d_namlen = 1;
3449                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3450                         dp->d_name[0] = '.';
3451                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3452                         /*
3453                          * Just make these offset cookie 0.
3454                          */
3455                         tl = (u_int32_t *)&dp->d_name[8];
3456                         *tl++ = 0;
3457                         *tl = 0;
3458                         blksiz += dp->d_reclen;
3459                         uiop->uio_resid -= dp->d_reclen;
3460                         uiop->uio_offset += dp->d_reclen;
3461                         uiop->uio_iov->iov_base =
3462                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3463                         uiop->uio_iov->iov_len -= dp->d_reclen;
3464                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3465                         dp->d_pad0 = dp->d_pad1 = 0;
3466                         dp->d_off = 0;
3467                         dp->d_type = DT_DIR;
3468                         dp->d_fileno = dotdotfileid;
3469                         dp->d_namlen = 2;
3470                         *((uint64_t *)dp->d_name) = 0;
3471                         dp->d_name[0] = '.';
3472                         dp->d_name[1] = '.';
3473                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3474                         /*
3475                          * Just make these offset cookie 0.
3476                          */
3477                         tl = (u_int32_t *)&dp->d_name[8];
3478                         *tl++ = 0;
3479                         *tl = 0;
3480                         blksiz += dp->d_reclen;
3481                         uiop->uio_resid -= dp->d_reclen;
3482                         uiop->uio_offset += dp->d_reclen;
3483                         uiop->uio_iov->iov_base =
3484                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3485                         uiop->uio_iov->iov_len -= dp->d_reclen;
3486                 }
3487                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
3488         } else {
3489                 reqsize = 5 * NFSX_UNSIGNED;
3490         }
3491
3492         /*
3493          * Loop around doing readdir rpc's of size readsize.
3494          * The stopping criteria is EOF or buffer full.
3495          */
3496         while (more_dirs && bigenough) {
3497                 *attrflagp = 0;
3498                 NFSCL_REQSTART(nd, NFSPROC_READDIR, vp, cred);
3499                 if (nd->nd_flag & ND_NFSV2) {
3500                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3501                         *tl++ = cookie.lval[1];
3502                         *tl = txdr_unsigned(readsize);
3503                 } else {
3504                         NFSM_BUILD(tl, u_int32_t *, reqsize);
3505                         *tl++ = cookie.lval[0];
3506                         *tl++ = cookie.lval[1];
3507                         if (cookie.qval == 0) {
3508                                 *tl++ = 0;
3509                                 *tl++ = 0;
3510                         } else {
3511                                 NFSLOCKNODE(dnp);
3512                                 *tl++ = dnp->n_cookieverf.nfsuquad[0];
3513                                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
3514                                 NFSUNLOCKNODE(dnp);
3515                         }
3516                         if (nd->nd_flag & ND_NFSV4) {
3517                                 *tl++ = txdr_unsigned(readsize);
3518                                 *tl = txdr_unsigned(readsize);
3519                                 (void) nfsrv_putattrbit(nd, &attrbits);
3520                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3521                                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3522                                 (void) nfsrv_putattrbit(nd, &dattrbits);
3523                         } else {
3524                                 *tl = txdr_unsigned(readsize);
3525                         }
3526                 }
3527                 error = nfscl_request(nd, vp, p, cred);
3528                 if (error)
3529                         return (error);
3530                 if (!(nd->nd_flag & ND_NFSV2)) {
3531                         if (nd->nd_flag & ND_NFSV3)
3532                                 error = nfscl_postop_attr(nd, nap, attrflagp);
3533                         if (!nd->nd_repstat && !error) {
3534                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3535                                 NFSLOCKNODE(dnp);
3536                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3537                                 dnp->n_cookieverf.nfsuquad[1] = *tl;
3538                                 NFSUNLOCKNODE(dnp);
3539                         }
3540                 }
3541                 if (nd->nd_repstat || error) {
3542                         if (!error)
3543                                 error = nd->nd_repstat;
3544                         goto nfsmout;
3545                 }
3546                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3547                 more_dirs = fxdr_unsigned(int, *tl);
3548                 if (!more_dirs)
3549                         tryformoredirs = 0;
3550
3551                 /* loop through the dir entries, doctoring them to 4bsd form */
3552                 while (more_dirs && bigenough) {
3553                         if (nd->nd_flag & ND_NFSV4) {
3554                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3555                                 ncookie.lval[0] = *tl++;
3556                                 ncookie.lval[1] = *tl++;
3557                                 len = fxdr_unsigned(int, *tl);
3558                         } else if (nd->nd_flag & ND_NFSV3) {
3559                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3560                                 nfsva.na_fileid = fxdr_hyper(tl);
3561                                 tl += 2;
3562                                 len = fxdr_unsigned(int, *tl);
3563                         } else {
3564                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3565                                 nfsva.na_fileid = fxdr_unsigned(uint64_t,
3566                                     *tl++);
3567                                 len = fxdr_unsigned(int, *tl);
3568                         }
3569                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3570                                 error = EBADRPC;
3571                                 goto nfsmout;
3572                         }
3573                         tlen = roundup2(len, 8);
3574                         if (tlen == len)
3575                                 tlen += 8;  /* To ensure null termination. */
3576                         left = DIRBLKSIZ - blksiz;
3577                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3578                                 NFSBZERO(uiop->uio_iov->iov_base, left);
3579                                 dp->d_reclen += left;
3580                                 uiop->uio_iov->iov_base =
3581                                     (char *)uiop->uio_iov->iov_base + left;
3582                                 uiop->uio_iov->iov_len -= left;
3583                                 uiop->uio_resid -= left;
3584                                 uiop->uio_offset += left;
3585                                 blksiz = 0;
3586                         }
3587                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3588                             uiop->uio_resid)
3589                                 bigenough = 0;
3590                         if (bigenough) {
3591                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3592                                 dp->d_pad0 = dp->d_pad1 = 0;
3593                                 dp->d_off = 0;
3594                                 dp->d_namlen = len;
3595                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3596                                     NFSX_HYPER;
3597                                 dp->d_type = DT_UNKNOWN;
3598                                 blksiz += dp->d_reclen;
3599                                 if (blksiz == DIRBLKSIZ)
3600                                         blksiz = 0;
3601                                 uiop->uio_resid -= DIRHDSIZ;
3602                                 uiop->uio_offset += DIRHDSIZ;
3603                                 uiop->uio_iov->iov_base =
3604                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3605                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
3606                                 error = nfsm_mbufuio(nd, uiop, len);
3607                                 if (error)
3608                                         goto nfsmout;
3609                                 cp = uiop->uio_iov->iov_base;
3610                                 tlen -= len;
3611                                 NFSBZERO(cp, tlen);
3612                                 cp += tlen;     /* points to cookie storage */
3613                                 tl2 = (u_int32_t *)cp;
3614                                 uiop->uio_iov->iov_base =
3615                                     (char *)uiop->uio_iov->iov_base + tlen +
3616                                     NFSX_HYPER;
3617                                 uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3618                                 uiop->uio_resid -= tlen + NFSX_HYPER;
3619                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3620                         } else {
3621                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3622                                 if (error)
3623                                         goto nfsmout;
3624                         }
3625                         if (nd->nd_flag & ND_NFSV4) {
3626                                 rderr = 0;
3627                                 nfsva.na_mntonfileno = UINT64_MAX;
3628                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3629                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3630                                     NULL, NULL, &rderr, p, cred);
3631                                 if (error)
3632                                         goto nfsmout;
3633                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3634                         } else if (nd->nd_flag & ND_NFSV3) {
3635                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3636                                 ncookie.lval[0] = *tl++;
3637                                 ncookie.lval[1] = *tl++;
3638                         } else {
3639                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3640                                 ncookie.lval[0] = 0;
3641                                 ncookie.lval[1] = *tl++;
3642                         }
3643                         if (bigenough) {
3644                             if (nd->nd_flag & ND_NFSV4) {
3645                                 if (rderr) {
3646                                     dp->d_fileno = 0;
3647                                 } else {
3648                                     if (gotmnton) {
3649                                         if (nfsva.na_mntonfileno != UINT64_MAX)
3650                                             dp->d_fileno = nfsva.na_mntonfileno;
3651                                         else
3652                                             dp->d_fileno = nfsva.na_fileid;
3653                                     } else if (nfsva.na_filesid[0] ==
3654                                         dnp->n_vattr.na_filesid[0] &&
3655                                         nfsva.na_filesid[1] ==
3656                                         dnp->n_vattr.na_filesid[1]) {
3657                                         dp->d_fileno = nfsva.na_fileid;
3658                                     } else {
3659                                         do {
3660                                             fakefileno--;
3661                                         } while (fakefileno ==
3662                                             nfsva.na_fileid);
3663                                         dp->d_fileno = fakefileno;
3664                                     }
3665                                     dp->d_type = vtonfs_dtype(nfsva.na_type);
3666                                 }
3667                             } else {
3668                                 dp->d_fileno = nfsva.na_fileid;
3669                             }
3670                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3671                                 ncookie.lval[0];
3672                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3673                                 ncookie.lval[1];
3674                         }
3675                         more_dirs = fxdr_unsigned(int, *tl);
3676                 }
3677                 /*
3678                  * If at end of rpc data, get the eof boolean
3679                  */
3680                 if (!more_dirs) {
3681                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3682                         eof = fxdr_unsigned(int, *tl);
3683                         if (tryformoredirs)
3684                                 more_dirs = !eof;
3685                         if (nd->nd_flag & ND_NFSV4) {
3686                                 error = nfscl_postop_attr(nd, nap, attrflagp);
3687                                 if (error)
3688                                         goto nfsmout;
3689                         }
3690                 }
3691                 m_freem(nd->nd_mrep);
3692                 nd->nd_mrep = NULL;
3693         }
3694         /*
3695          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3696          * by increasing d_reclen for the last record.
3697          */
3698         if (blksiz > 0) {
3699                 left = DIRBLKSIZ - blksiz;
3700                 NFSBZERO(uiop->uio_iov->iov_base, left);
3701                 dp->d_reclen += left;
3702                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3703                     left;
3704                 uiop->uio_iov->iov_len -= left;
3705                 uiop->uio_resid -= left;
3706                 uiop->uio_offset += left;
3707         }
3708
3709         /*
3710          * If returning no data, assume end of file.
3711          * If not bigenough, return not end of file, since you aren't
3712          *    returning all the data
3713          * Otherwise, return the eof flag from the server.
3714          */
3715         if (eofp) {
3716                 if (tresid == ((size_t)(uiop->uio_resid)))
3717                         *eofp = 1;
3718                 else if (!bigenough)
3719                         *eofp = 0;
3720                 else
3721                         *eofp = eof;
3722         }
3723
3724         /*
3725          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3726          */
3727         while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3728                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3729                 NFSBZERO(dp, DIRBLKSIZ);
3730                 dp->d_type = DT_UNKNOWN;
3731                 tl = (u_int32_t *)&dp->d_name[4];
3732                 *tl++ = cookie.lval[0];
3733                 *tl = cookie.lval[1];
3734                 dp->d_reclen = DIRBLKSIZ;
3735                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3736                     DIRBLKSIZ;
3737                 uiop->uio_iov->iov_len -= DIRBLKSIZ;
3738                 uiop->uio_resid -= DIRBLKSIZ;
3739                 uiop->uio_offset += DIRBLKSIZ;
3740         }
3741
3742 nfsmout:
3743         if (nd->nd_mrep != NULL)
3744                 m_freem(nd->nd_mrep);
3745         return (error);
3746 }
3747
3748 /*
3749  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3750  * (Also used for NFS V4 when mount flag set.)
3751  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3752  */
3753 int
3754 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3755     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3756     int *eofp)
3757 {
3758         int len, left;
3759         struct dirent *dp = NULL;
3760         u_int32_t *tl;
3761         vnode_t newvp = NULLVP;
3762         struct nfsrv_descript nfsd, *nd = &nfsd;
3763         struct nameidata nami, *ndp = &nami;
3764         struct componentname *cnp = &ndp->ni_cnd;
3765         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3766         struct nfsnode *dnp = VTONFS(vp), *np;
3767         struct nfsvattr nfsva;
3768         struct nfsfh *nfhp;
3769         nfsquad_t cookie, ncookie;
3770         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3771         int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3772         int isdotdot = 0, unlocknewvp = 0;
3773         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3774         u_int64_t fileno = 0;
3775         char *cp;
3776         nfsattrbit_t attrbits, dattrbits;
3777         size_t tresid;
3778         u_int32_t *tl2 = NULL, rderr;
3779         struct timespec dctime, ts;
3780         bool attr_ok;
3781
3782         KASSERT(uiop->uio_iovcnt == 1 &&
3783             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3784             ("nfs readdirplusrpc bad uio"));
3785         ncookie.lval[0] = ncookie.lval[1] = 0;
3786         timespecclear(&dctime);
3787         *attrflagp = 0;
3788         if (eofp != NULL)
3789                 *eofp = 0;
3790         ndp->ni_dvp = vp;
3791         nd->nd_mrep = NULL;
3792         cookie.lval[0] = cookiep->nfsuquad[0];
3793         cookie.lval[1] = cookiep->nfsuquad[1];
3794         tresid = uiop->uio_resid;
3795
3796         /*
3797          * For NFSv4, first create the "." and ".." entries.
3798          */
3799         if (NFSHASNFSV4(nmp)) {
3800                 NFSGETATTR_ATTRBIT(&dattrbits);
3801                 NFSZERO_ATTRBIT(&attrbits);
3802                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3803                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3804                     NFSATTRBIT_MOUNTEDONFILEID)) {
3805                         NFSSETBIT_ATTRBIT(&attrbits,
3806                             NFSATTRBIT_MOUNTEDONFILEID);
3807                         gotmnton = 1;
3808                 } else {
3809                         /*
3810                          * Must fake it. Use the fileno, except when the
3811                          * fsid is != to that of the directory. For that
3812                          * case, generate a fake fileno that is not the same.
3813                          */
3814                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3815                         gotmnton = 0;
3816                 }
3817
3818                 /*
3819                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3820                  */
3821                 if (uiop->uio_offset == 0) {
3822                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3823                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3824                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
3825                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3826                         (void) nfsrv_putattrbit(nd, &attrbits);
3827                         error = nfscl_request(nd, vp, p, cred);
3828                         if (error)
3829                             return (error);
3830                         dotfileid = 0;  /* Fake out the compiler. */
3831                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3832                             error = nfsm_loadattr(nd, &nfsva);
3833                             if (error != 0)
3834                                 goto nfsmout;
3835                             dctime = nfsva.na_ctime;
3836                             dotfileid = nfsva.na_fileid;
3837                         }
3838                         if (nd->nd_repstat == 0) {
3839                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3840                             len = fxdr_unsigned(int, *(tl + 4));
3841                             if (len > 0 && len <= NFSX_V4FHMAX)
3842                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3843                             else
3844                                 error = EPERM;
3845                             if (!error) {
3846                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3847                                 nfsva.na_mntonfileno = UINT64_MAX;
3848                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3849                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3850                                     NULL, NULL, NULL, p, cred);
3851                                 if (error) {
3852                                     dotdotfileid = dotfileid;
3853                                 } else if (gotmnton) {
3854                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3855                                         dotdotfileid = nfsva.na_mntonfileno;
3856                                     else
3857                                         dotdotfileid = nfsva.na_fileid;
3858                                 } else if (nfsva.na_filesid[0] ==
3859                                     dnp->n_vattr.na_filesid[0] &&
3860                                     nfsva.na_filesid[1] ==
3861                                     dnp->n_vattr.na_filesid[1]) {
3862                                     dotdotfileid = nfsva.na_fileid;
3863                                 } else {
3864                                     do {
3865                                         fakefileno--;
3866                                     } while (fakefileno ==
3867                                         nfsva.na_fileid);
3868                                     dotdotfileid = fakefileno;
3869                                 }
3870                             }
3871                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3872                             /*
3873                              * Lookupp returns NFSERR_NOENT when we are
3874                              * at the root, so just use the current dir.
3875                              */
3876                             nd->nd_repstat = 0;
3877                             dotdotfileid = dotfileid;
3878                         } else {
3879                             error = nd->nd_repstat;
3880                         }
3881                         m_freem(nd->nd_mrep);
3882                         if (error)
3883                             return (error);
3884                         nd->nd_mrep = NULL;
3885                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3886                         dp->d_pad0 = dp->d_pad1 = 0;
3887                         dp->d_off = 0;
3888                         dp->d_type = DT_DIR;
3889                         dp->d_fileno = dotfileid;
3890                         dp->d_namlen = 1;
3891                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3892                         dp->d_name[0] = '.';
3893                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3894                         /*
3895                          * Just make these offset cookie 0.
3896                          */
3897                         tl = (u_int32_t *)&dp->d_name[8];
3898                         *tl++ = 0;
3899                         *tl = 0;
3900                         blksiz += dp->d_reclen;
3901                         uiop->uio_resid -= dp->d_reclen;
3902                         uiop->uio_offset += dp->d_reclen;
3903                         uiop->uio_iov->iov_base =
3904                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3905                         uiop->uio_iov->iov_len -= dp->d_reclen;
3906                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3907                         dp->d_pad0 = dp->d_pad1 = 0;
3908                         dp->d_off = 0;
3909                         dp->d_type = DT_DIR;
3910                         dp->d_fileno = dotdotfileid;
3911                         dp->d_namlen = 2;
3912                         *((uint64_t *)dp->d_name) = 0;
3913                         dp->d_name[0] = '.';
3914                         dp->d_name[1] = '.';
3915                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3916                         /*
3917                          * Just make these offset cookie 0.
3918                          */
3919                         tl = (u_int32_t *)&dp->d_name[8];
3920                         *tl++ = 0;
3921                         *tl = 0;
3922                         blksiz += dp->d_reclen;
3923                         uiop->uio_resid -= dp->d_reclen;
3924                         uiop->uio_offset += dp->d_reclen;
3925                         uiop->uio_iov->iov_base =
3926                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3927                         uiop->uio_iov->iov_len -= dp->d_reclen;
3928                 }
3929                 NFSREADDIRPLUS_ATTRBIT(&attrbits);
3930                 if (gotmnton)
3931                         NFSSETBIT_ATTRBIT(&attrbits,
3932                             NFSATTRBIT_MOUNTEDONFILEID);
3933                 if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3934                     NFSATTRBIT_TIMECREATE))
3935                         NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
3936         }
3937
3938         /*
3939          * Loop around doing readdir rpc's of size nm_readdirsize.
3940          * The stopping criteria is EOF or buffer full.
3941          */
3942         while (more_dirs && bigenough) {
3943                 *attrflagp = 0;
3944                 NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp, cred);
3945                 NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3946                 *tl++ = cookie.lval[0];
3947                 *tl++ = cookie.lval[1];
3948                 if (cookie.qval == 0) {
3949                         *tl++ = 0;
3950                         *tl++ = 0;
3951                 } else {
3952                         NFSLOCKNODE(dnp);
3953                         *tl++ = dnp->n_cookieverf.nfsuquad[0];
3954                         *tl++ = dnp->n_cookieverf.nfsuquad[1];
3955                         NFSUNLOCKNODE(dnp);
3956                 }
3957                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
3958                 *tl = txdr_unsigned(nmp->nm_readdirsize);
3959                 if (nd->nd_flag & ND_NFSV4) {
3960                         (void) nfsrv_putattrbit(nd, &attrbits);
3961                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3962                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3963                         (void) nfsrv_putattrbit(nd, &dattrbits);
3964                 }
3965                 nanouptime(&ts);
3966                 error = nfscl_request(nd, vp, p, cred);
3967                 if (error)
3968                         return (error);
3969                 if (nd->nd_flag & ND_NFSV3)
3970                         error = nfscl_postop_attr(nd, nap, attrflagp);
3971                 if (nd->nd_repstat || error) {
3972                         if (!error)
3973                                 error = nd->nd_repstat;
3974                         goto nfsmout;
3975                 }
3976                 if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3977                         dctime = nap->na_ctime;
3978                 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3979                 NFSLOCKNODE(dnp);
3980                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3981                 dnp->n_cookieverf.nfsuquad[1] = *tl++;
3982                 NFSUNLOCKNODE(dnp);
3983                 more_dirs = fxdr_unsigned(int, *tl);
3984                 if (!more_dirs)
3985                         tryformoredirs = 0;
3986
3987                 /* loop through the dir entries, doctoring them to 4bsd form */
3988                 while (more_dirs && bigenough) {
3989                         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3990                         if (nd->nd_flag & ND_NFSV4) {
3991                                 ncookie.lval[0] = *tl++;
3992                                 ncookie.lval[1] = *tl++;
3993                         } else {
3994                                 fileno = fxdr_hyper(tl);
3995                                 tl += 2;
3996                         }
3997                         len = fxdr_unsigned(int, *tl);
3998                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3999                                 error = EBADRPC;
4000                                 goto nfsmout;
4001                         }
4002                         tlen = roundup2(len, 8);
4003                         if (tlen == len)
4004                                 tlen += 8;  /* To ensure null termination. */
4005                         left = DIRBLKSIZ - blksiz;
4006                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
4007                                 NFSBZERO(uiop->uio_iov->iov_base, left);
4008                                 dp->d_reclen += left;
4009                                 uiop->uio_iov->iov_base =
4010                                     (char *)uiop->uio_iov->iov_base + left;
4011                                 uiop->uio_iov->iov_len -= left;
4012                                 uiop->uio_resid -= left;
4013                                 uiop->uio_offset += left;
4014                                 blksiz = 0;
4015                         }
4016                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
4017                             uiop->uio_resid)
4018                                 bigenough = 0;
4019                         if (bigenough) {
4020                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
4021                                 dp->d_pad0 = dp->d_pad1 = 0;
4022                                 dp->d_off = 0;
4023                                 dp->d_namlen = len;
4024                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
4025                                     NFSX_HYPER;
4026                                 dp->d_type = DT_UNKNOWN;
4027                                 blksiz += dp->d_reclen;
4028                                 if (blksiz == DIRBLKSIZ)
4029                                         blksiz = 0;
4030                                 uiop->uio_resid -= DIRHDSIZ;
4031                                 uiop->uio_offset += DIRHDSIZ;
4032                                 uiop->uio_iov->iov_base =
4033                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
4034                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
4035                                 cnp->cn_nameptr = uiop->uio_iov->iov_base;
4036                                 cnp->cn_namelen = len;
4037                                 NFSCNHASHZERO(cnp);
4038                                 error = nfsm_mbufuio(nd, uiop, len);
4039                                 if (error)
4040                                         goto nfsmout;
4041                                 cp = uiop->uio_iov->iov_base;
4042                                 tlen -= len;
4043                                 NFSBZERO(cp, tlen);
4044                                 cp += tlen;     /* points to cookie storage */
4045                                 tl2 = (u_int32_t *)cp;
4046                                 if (len == 2 && cnp->cn_nameptr[0] == '.' &&
4047                                     cnp->cn_nameptr[1] == '.')
4048                                         isdotdot = 1;
4049                                 else
4050                                         isdotdot = 0;
4051                                 uiop->uio_iov->iov_base =
4052                                     (char *)uiop->uio_iov->iov_base + tlen +
4053                                     NFSX_HYPER;
4054                                 uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
4055                                 uiop->uio_resid -= tlen + NFSX_HYPER;
4056                                 uiop->uio_offset += (tlen + NFSX_HYPER);
4057                         } else {
4058                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
4059                                 if (error)
4060                                         goto nfsmout;
4061                         }
4062                         nfhp = NULL;
4063                         if (nd->nd_flag & ND_NFSV3) {
4064                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
4065                                 ncookie.lval[0] = *tl++;
4066                                 ncookie.lval[1] = *tl++;
4067                                 attrflag = fxdr_unsigned(int, *tl);
4068                                 if (attrflag) {
4069                                   error = nfsm_loadattr(nd, &nfsva);
4070                                   if (error)
4071                                         goto nfsmout;
4072                                 }
4073                                 NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
4074                                 if (*tl) {
4075                                         error = nfsm_getfh(nd, &nfhp);
4076                                         if (error)
4077                                             goto nfsmout;
4078                                 }
4079                                 if (!attrflag && nfhp != NULL) {
4080                                         free(nfhp, M_NFSFH);
4081                                         nfhp = NULL;
4082                                 }
4083                         } else {
4084                                 rderr = 0;
4085                                 nfsva.na_mntonfileno = 0xffffffff;
4086                                 error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
4087                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
4088                                     NULL, NULL, &rderr, p, cred);
4089                                 if (error)
4090                                         goto nfsmout;
4091                         }
4092
4093                         if (bigenough) {
4094                             if (nd->nd_flag & ND_NFSV4) {
4095                                 if (rderr) {
4096                                     dp->d_fileno = 0;
4097                                 } else if (gotmnton) {
4098                                     if (nfsva.na_mntonfileno != 0xffffffff)
4099                                         dp->d_fileno = nfsva.na_mntonfileno;
4100                                     else
4101                                         dp->d_fileno = nfsva.na_fileid;
4102                                 } else if (nfsva.na_filesid[0] ==
4103                                     dnp->n_vattr.na_filesid[0] &&
4104                                     nfsva.na_filesid[1] ==
4105                                     dnp->n_vattr.na_filesid[1]) {
4106                                     dp->d_fileno = nfsva.na_fileid;
4107                                 } else {
4108                                     do {
4109                                         fakefileno--;
4110                                     } while (fakefileno ==
4111                                         nfsva.na_fileid);
4112                                     dp->d_fileno = fakefileno;
4113                                 }
4114                             } else {
4115                                 dp->d_fileno = fileno;
4116                             }
4117                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
4118                                 ncookie.lval[0];
4119                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
4120                                 ncookie.lval[1];
4121
4122                             if (nfhp != NULL) {
4123                                 attr_ok = true;
4124                                 if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
4125                                     dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
4126                                     VREF(vp);
4127                                     newvp = vp;
4128                                     unlocknewvp = 0;
4129                                     free(nfhp, M_NFSFH);
4130                                     np = dnp;
4131                                 } else if (isdotdot != 0) {
4132                                     /*
4133                                      * Skip doing a nfscl_nget() call for "..".
4134                                      * There's a race between acquiring the nfs
4135                                      * node here and lookups that look for the
4136                                      * directory being read (in the parent).
4137                                      * It would try to get a lock on ".." here,
4138                                      * owning the lock on the directory being
4139                                      * read. Lookup will hold the lock on ".."
4140                                      * and try to acquire the lock on the
4141                                      * directory being read.
4142                                      * If the directory is unlocked/relocked,
4143                                      * then there is a LOR with the buflock
4144                                      * vp is relocked.
4145                                      */
4146                                     free(nfhp, M_NFSFH);
4147                                 } else {
4148                                     error = nfscl_nget(vp->v_mount, vp,
4149                                       nfhp, cnp, p, &np, LK_EXCLUSIVE);
4150                                     if (!error) {
4151                                         newvp = NFSTOV(np);
4152                                         unlocknewvp = 1;
4153                                         /*
4154                                          * If n_localmodtime >= time before RPC,
4155                                          * then a file modification operation,
4156                                          * such as VOP_SETATTR() of size, has
4157                                          * occurred while the Lookup RPC and
4158                                          * acquisition of the vnode happened. As
4159                                          * such, the attributes might be stale,
4160                                          * with possibly an incorrect size.
4161                                          */
4162                                         NFSLOCKNODE(np);
4163                                         if (timespecisset(
4164                                             &np->n_localmodtime) &&
4165                                             timespeccmp(&np->n_localmodtime,
4166                                             &ts, >=)) {
4167                                             NFSCL_DEBUG(4, "nfsrpc_readdirplus:"
4168                                                 " localmod stale attributes\n");
4169                                             attr_ok = false;
4170                                         }
4171                                         NFSUNLOCKNODE(np);
4172                                     }
4173                                 }
4174                                 nfhp = NULL;
4175                                 if (newvp != NULLVP) {
4176                                     if (attr_ok)
4177                                         error = nfscl_loadattrcache(&newvp,
4178                                             &nfsva, NULL, 0, 0);
4179                                     if (error) {
4180                                         if (unlocknewvp)
4181                                             vput(newvp);
4182                                         else
4183                                             vrele(newvp);
4184                                         goto nfsmout;
4185                                     }
4186                                     dp->d_type =
4187                                         vtonfs_dtype(np->n_vattr.na_type);
4188                                     ndp->ni_vp = newvp;
4189                                     NFSCNHASH(cnp, HASHINIT);
4190                                     if (cnp->cn_namelen <= NCHNAMLEN &&
4191                                         ndp->ni_dvp != ndp->ni_vp &&
4192                                         (newvp->v_type != VDIR ||
4193                                          dctime.tv_sec != 0)) {
4194                                         cache_enter_time_flags(ndp->ni_dvp,
4195                                             ndp->ni_vp, cnp,
4196                                             &nfsva.na_ctime,
4197                                             newvp->v_type != VDIR ? NULL :
4198                                             &dctime, VFS_CACHE_DROPOLD);
4199                                     }
4200                                     if (unlocknewvp)
4201                                         vput(newvp);
4202                                     else
4203                                         vrele(newvp);
4204                                     newvp = NULLVP;
4205                                 }
4206                             }
4207                         } else if (nfhp != NULL) {
4208                             free(nfhp, M_NFSFH);
4209                         }
4210                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4211                         more_dirs = fxdr_unsigned(int, *tl);
4212                 }
4213                 /*
4214                  * If at end of rpc data, get the eof boolean
4215                  */
4216                 if (!more_dirs) {
4217                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4218                         eof = fxdr_unsigned(int, *tl);
4219                         if (tryformoredirs)
4220                                 more_dirs = !eof;
4221                         if (nd->nd_flag & ND_NFSV4) {
4222                                 error = nfscl_postop_attr(nd, nap, attrflagp);
4223                                 if (error)
4224                                         goto nfsmout;
4225                         }
4226                 }
4227                 m_freem(nd->nd_mrep);
4228                 nd->nd_mrep = NULL;
4229         }
4230         /*
4231          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
4232          * by increasing d_reclen for the last record.
4233          */
4234         if (blksiz > 0) {
4235                 left = DIRBLKSIZ - blksiz;
4236                 NFSBZERO(uiop->uio_iov->iov_base, left);
4237                 dp->d_reclen += left;
4238                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4239                     left;
4240                 uiop->uio_iov->iov_len -= left;
4241                 uiop->uio_resid -= left;
4242                 uiop->uio_offset += left;
4243         }
4244
4245         /*
4246          * If returning no data, assume end of file.
4247          * If not bigenough, return not end of file, since you aren't
4248          *    returning all the data
4249          * Otherwise, return the eof flag from the server.
4250          */
4251         if (eofp != NULL) {
4252                 if (tresid == uiop->uio_resid)
4253                         *eofp = 1;
4254                 else if (!bigenough)
4255                         *eofp = 0;
4256                 else
4257                         *eofp = eof;
4258         }
4259
4260         /*
4261          * Add extra empty records to any remaining DIRBLKSIZ chunks.
4262          */
4263         while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
4264                 dp = (struct dirent *)uiop->uio_iov->iov_base;
4265                 NFSBZERO(dp, DIRBLKSIZ);
4266                 dp->d_type = DT_UNKNOWN;
4267                 tl = (u_int32_t *)&dp->d_name[4];
4268                 *tl++ = cookie.lval[0];
4269                 *tl = cookie.lval[1];
4270                 dp->d_reclen = DIRBLKSIZ;
4271                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4272                     DIRBLKSIZ;
4273                 uiop->uio_iov->iov_len -= DIRBLKSIZ;
4274                 uiop->uio_resid -= DIRBLKSIZ;
4275                 uiop->uio_offset += DIRBLKSIZ;
4276         }
4277
4278 nfsmout:
4279         if (nd->nd_mrep != NULL)
4280                 m_freem(nd->nd_mrep);
4281         return (error);
4282 }
4283
4284 /*
4285  * Nfs commit rpc
4286  */
4287 int
4288 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
4289     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4290 {
4291         u_int32_t *tl;
4292         struct nfsrv_descript nfsd, *nd = &nfsd;
4293         nfsattrbit_t attrbits;
4294         int error;
4295         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4296
4297         *attrflagp = 0;
4298         NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp, cred);
4299         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
4300         txdr_hyper(offset, tl);
4301         tl += 2;
4302         *tl = txdr_unsigned(cnt);
4303         if (nd->nd_flag & ND_NFSV4) {
4304                 /*
4305                  * And do a Getattr op.
4306                  */
4307                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4308                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
4309                 NFSGETATTR_ATTRBIT(&attrbits);
4310                 (void) nfsrv_putattrbit(nd, &attrbits);
4311         }
4312         error = nfscl_request(nd, vp, p, cred);
4313         if (error)
4314                 return (error);
4315         error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, NULL);
4316         if (!error && !nd->nd_repstat) {
4317                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
4318                 NFSLOCKMNT(nmp);
4319                 if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
4320                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
4321                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
4322                 }
4323                 NFSUNLOCKMNT(nmp);
4324                 if (nd->nd_flag & ND_NFSV4)
4325                         error = nfscl_postop_attr(nd, nap, attrflagp);
4326         }
4327 nfsmout:
4328         if (!error && nd->nd_repstat)
4329                 error = nd->nd_repstat;
4330         m_freem(nd->nd_mrep);
4331         return (error);
4332 }
4333
4334 /*
4335  * NFS byte range lock rpc.
4336  * (Mostly just calls one of the three lower level RPC routines.)
4337  */
4338 int
4339 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
4340     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4341 {
4342         struct nfscllockowner *lp;
4343         struct nfsclclient *clp;
4344         struct nfsfh *nfhp;
4345         struct nfsrv_descript nfsd, *nd = &nfsd;
4346         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4347         u_int64_t off, len;
4348         off_t start, end;
4349         u_int32_t clidrev = 0;
4350         int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
4351         int callcnt, dorpc;
4352
4353         /*
4354          * Convert the flock structure into a start and end and do POSIX
4355          * bounds checking.
4356          */
4357         switch (fl->l_whence) {
4358         case SEEK_SET:
4359         case SEEK_CUR:
4360                 /*
4361                  * Caller is responsible for adding any necessary offset
4362                  * when SEEK_CUR is used.
4363                  */
4364                 start = fl->l_start;
4365                 off = fl->l_start;
4366                 break;
4367         case SEEK_END:
4368                 start = size + fl->l_start;
4369                 off = size + fl->l_start;
4370                 break;
4371         default:
4372                 return (EINVAL);
4373         }
4374         if (start < 0)
4375                 return (EINVAL);
4376         if (fl->l_len != 0) {
4377                 end = start + fl->l_len - 1;
4378                 if (end < start)
4379                         return (EINVAL);
4380         }
4381
4382         len = fl->l_len;
4383         if (len == 0)
4384                 len = NFS64BITSSET;
4385         retrycnt = 0;
4386         do {
4387             nd->nd_repstat = 0;
4388             if (op == F_GETLK) {
4389                 error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4390                 if (error)
4391                         return (error);
4392                 error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
4393                 if (!error) {
4394                         clidrev = clp->nfsc_clientidrev;
4395                         error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
4396                             p, id, flags);
4397                 } else if (error == -1) {
4398                         error = 0;
4399                 }
4400                 nfscl_clientrelease(clp);
4401             } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
4402                 /*
4403                  * We must loop around for all lockowner cases.
4404                  */
4405                 callcnt = 0;
4406                 error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4407                 if (error)
4408                         return (error);
4409                 do {
4410                     error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
4411                         clp, id, flags, &lp, &dorpc);
4412                     /*
4413                      * If it returns a NULL lp, we're done.
4414                      */
4415                     if (lp == NULL) {
4416                         if (callcnt == 0)
4417                             nfscl_clientrelease(clp);
4418                         else
4419                             nfscl_releasealllocks(clp, vp, p, id, flags);
4420                         return (error);
4421                     }
4422                     if (nmp->nm_clp != NULL)
4423                         clidrev = nmp->nm_clp->nfsc_clientidrev;
4424                     else
4425                         clidrev = 0;
4426                     /*
4427                      * If the server doesn't support Posix lock semantics,
4428                      * only allow locks on the entire file, since it won't
4429                      * handle overlapping byte ranges.
4430                      * There might still be a problem when a lock
4431                      * upgrade/downgrade (read<->write) occurs, since the
4432                      * server "might" expect an unlock first?
4433                      */
4434                     if (dorpc && (lp->nfsl_open->nfso_posixlock ||
4435                         (off == 0 && len == NFS64BITSSET))) {
4436                         /*
4437                          * Since the lock records will go away, we must
4438                          * wait for grace and delay here.
4439                          */
4440                         do {
4441                             error = nfsrpc_locku(nd, nmp, lp, off, len,
4442                                 NFSV4LOCKT_READ, cred, p, 0);
4443                             if ((nd->nd_repstat == NFSERR_GRACE ||
4444                                  nd->nd_repstat == NFSERR_DELAY) &&
4445                                 error == 0)
4446                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4447                                     "nfs_advlock");
4448                         } while ((nd->nd_repstat == NFSERR_GRACE ||
4449                             nd->nd_repstat == NFSERR_DELAY) && error == 0);
4450                     }
4451                     callcnt++;
4452                 } while (error == 0 && nd->nd_repstat == 0);
4453                 nfscl_releasealllocks(clp, vp, p, id, flags);
4454             } else if (op == F_SETLK) {
4455                 error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
4456                     NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
4457                 if (error || donelocally) {
4458                         return (error);
4459                 }
4460                 if (nmp->nm_clp != NULL)
4461                         clidrev = nmp->nm_clp->nfsc_clientidrev;
4462                 else
4463                         clidrev = 0;
4464                 nfhp = VTONFS(vp)->n_fhp;
4465                 if (!lp->nfsl_open->nfso_posixlock &&
4466                     (off != 0 || len != NFS64BITSSET)) {
4467                         error = EINVAL;
4468                 } else {
4469                         error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
4470                             nfhp->nfh_len, lp, newone, reclaim, off,
4471                             len, fl->l_type, cred, p, 0);
4472                 }
4473                 if (!error)
4474                         error = nd->nd_repstat;
4475                 nfscl_lockrelease(lp, error, newone);
4476             } else {
4477                 error = EINVAL;
4478             }
4479             if (!error)
4480                 error = nd->nd_repstat;
4481             if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
4482                 error == NFSERR_STALEDONTRECOVER ||
4483                 error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4484                 error == NFSERR_BADSESSION) {
4485                 (void) nfs_catnap(PZERO, error, "nfs_advlock");
4486             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
4487                 && clidrev != 0) {
4488                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
4489                 retrycnt++;
4490             }
4491         } while (error == NFSERR_GRACE ||
4492             error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4493             error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
4494             error == NFSERR_BADSESSION ||
4495             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
4496              expireret == 0 && clidrev != 0 && retrycnt < 4));
4497         if (error && retrycnt >= 4)
4498                 error = EIO;
4499         return (error);
4500 }
4501
4502 /*
4503  * The lower level routine for the LockT case.
4504  */
4505 int
4506 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
4507     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
4508     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4509 {
4510         u_int32_t *tl;
4511         int error, type, size;
4512         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4513         struct nfsnode *np;
4514         struct nfsmount *nmp;
4515         struct nfsclsession *tsep;
4516
4517         nmp = VFSTONFS(vp->v_mount);
4518         NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp, cred);
4519         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4520         if (fl->l_type == F_RDLCK)
4521                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4522         else
4523                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4524         txdr_hyper(off, tl);
4525         tl += 2;
4526         txdr_hyper(len, tl);
4527         tl += 2;
4528         tsep = nfsmnt_mdssession(nmp);
4529         *tl++ = tsep->nfsess_clientid.lval[0];
4530         *tl = tsep->nfsess_clientid.lval[1];
4531         nfscl_filllockowner(id, own, flags);
4532         np = VTONFS(vp);
4533         NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4534             np->n_fhp->nfh_len);
4535         (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4536         error = nfscl_request(nd, vp, p, cred);
4537         if (error)
4538                 return (error);
4539         if (nd->nd_repstat == 0) {
4540                 fl->l_type = F_UNLCK;
4541         } else if (nd->nd_repstat == NFSERR_DENIED) {
4542                 nd->nd_repstat = 0;
4543                 fl->l_whence = SEEK_SET;
4544                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4545                 fl->l_start = fxdr_hyper(tl);
4546                 tl += 2;
4547                 len = fxdr_hyper(tl);
4548                 tl += 2;
4549                 if (len == NFS64BITSSET)
4550                         fl->l_len = 0;
4551                 else
4552                         fl->l_len = len;
4553                 type = fxdr_unsigned(int, *tl++);
4554                 if (type == NFSV4LOCKT_WRITE)
4555                         fl->l_type = F_WRLCK;
4556                 else
4557                         fl->l_type = F_RDLCK;
4558                 /*
4559                  * XXX For now, I have no idea what to do with the
4560                  * conflicting lock_owner, so I'll just set the pid == 0
4561                  * and skip over the lock_owner.
4562                  */
4563                 fl->l_pid = (pid_t)0;
4564                 tl += 2;
4565                 size = fxdr_unsigned(int, *tl);
4566                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4567                         error = EBADRPC;
4568                 if (!error)
4569                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4570         } else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4571                 nfscl_initiate_recovery(clp);
4572 nfsmout:
4573         m_freem(nd->nd_mrep);
4574         return (error);
4575 }
4576
4577 /*
4578  * Lower level function that performs the LockU RPC.
4579  */
4580 static int
4581 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4582     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4583     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4584 {
4585         u_int32_t *tl;
4586         int error;
4587
4588         nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4589             lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0, cred);
4590         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4591         *tl++ = txdr_unsigned(type);
4592         *tl = txdr_unsigned(lp->nfsl_seqid);
4593         if (nfstest_outofseq &&
4594             (arc4random() % nfstest_outofseq) == 0)
4595                 *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4596         tl++;
4597         if (NFSHASNFSV4N(nmp))
4598                 *tl++ = 0;
4599         else
4600                 *tl++ = lp->nfsl_stateid.seqid;
4601         *tl++ = lp->nfsl_stateid.other[0];
4602         *tl++ = lp->nfsl_stateid.other[1];
4603         *tl++ = lp->nfsl_stateid.other[2];
4604         txdr_hyper(off, tl);
4605         tl += 2;
4606         txdr_hyper(len, tl);
4607         if (syscred)
4608                 nd->nd_flag |= ND_USEGSSNAME;
4609         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4610             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4611         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4612         if (error)
4613                 return (error);
4614         if (nd->nd_repstat == 0) {
4615                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4616                 lp->nfsl_stateid.seqid = *tl++;
4617                 lp->nfsl_stateid.other[0] = *tl++;
4618                 lp->nfsl_stateid.other[1] = *tl++;
4619                 lp->nfsl_stateid.other[2] = *tl;
4620         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4621                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4622 nfsmout:
4623         m_freem(nd->nd_mrep);
4624         return (error);
4625 }
4626
4627 /*
4628  * The actual Lock RPC.
4629  */
4630 int
4631 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4632     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4633     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4634     NFSPROC_T *p, int syscred)
4635 {
4636         u_int32_t *tl;
4637         int error, size;
4638         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4639         struct nfsclsession *tsep;
4640
4641         nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
4642             cred);
4643         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4644         if (type == F_RDLCK)
4645                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4646         else
4647                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4648         *tl++ = txdr_unsigned(reclaim);
4649         txdr_hyper(off, tl);
4650         tl += 2;
4651         txdr_hyper(len, tl);
4652         tl += 2;
4653         if (newone) {
4654             *tl = newnfs_true;
4655             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4656                 2 * NFSX_UNSIGNED + NFSX_HYPER);
4657             *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4658             if (NFSHASNFSV4N(nmp))
4659                 *tl++ = 0;
4660             else
4661                 *tl++ = lp->nfsl_open->nfso_stateid.seqid;
4662             *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4663             *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4664             *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4665             *tl++ = txdr_unsigned(lp->nfsl_seqid);
4666             tsep = nfsmnt_mdssession(nmp);
4667             *tl++ = tsep->nfsess_clientid.lval[0];
4668             *tl = tsep->nfsess_clientid.lval[1];
4669             NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4670             NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4671             (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4672         } else {
4673             *tl = newnfs_false;
4674             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4675             if (NFSHASNFSV4N(nmp))
4676                 *tl++ = 0;
4677             else
4678                 *tl++ = lp->nfsl_stateid.seqid;
4679             *tl++ = lp->nfsl_stateid.other[0];
4680             *tl++ = lp->nfsl_stateid.other[1];
4681             *tl++ = lp->nfsl_stateid.other[2];
4682             *tl = txdr_unsigned(lp->nfsl_seqid);
4683             if (nfstest_outofseq &&
4684                 (arc4random() % nfstest_outofseq) == 0)
4685                     *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4686         }
4687         if (syscred)
4688                 nd->nd_flag |= ND_USEGSSNAME;
4689         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4690             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4691         if (error)
4692                 return (error);
4693         if (newone)
4694             NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4695         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4696         if (nd->nd_repstat == 0) {
4697                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4698                 lp->nfsl_stateid.seqid = *tl++;
4699                 lp->nfsl_stateid.other[0] = *tl++;
4700                 lp->nfsl_stateid.other[1] = *tl++;
4701                 lp->nfsl_stateid.other[2] = *tl;
4702         } else if (nd->nd_repstat == NFSERR_DENIED) {
4703                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4704                 size = fxdr_unsigned(int, *(tl + 7));
4705                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4706                         error = EBADRPC;
4707                 if (!error)
4708                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4709         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4710                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4711 nfsmout:
4712         m_freem(nd->nd_mrep);
4713         return (error);
4714 }
4715
4716 /*
4717  * nfs statfs rpc
4718  * (always called with the vp for the mount point)
4719  */
4720 int
4721 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4722     uint32_t *leasep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap,
4723     int *attrflagp)
4724 {
4725         u_int32_t *tl = NULL;
4726         struct nfsrv_descript nfsd, *nd = &nfsd;
4727         struct nfsmount *nmp;
4728         nfsattrbit_t attrbits;
4729         int error;
4730
4731         *attrflagp = 0;
4732         nmp = VFSTONFS(vp->v_mount);
4733         if (NFSHASNFSV4(nmp)) {
4734                 /*
4735                  * For V4, you actually do a getattr.
4736                  */
4737                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4738                 if (leasep != NULL)
4739                         NFSROOTFS_GETATTRBIT(&attrbits);
4740                 else
4741                         NFSSTATFS_GETATTRBIT(&attrbits);
4742                 (void) nfsrv_putattrbit(nd, &attrbits);
4743                 nd->nd_flag |= ND_USEGSSNAME;
4744                 error = nfscl_request(nd, vp, p, cred);
4745                 if (error)
4746                         return (error);
4747                 if (nd->nd_repstat == 0) {
4748                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4749                             NULL, NULL, sbp, fsp, NULL, 0, NULL, leasep, NULL,
4750                             p, cred);
4751                         if (!error) {
4752                                 nmp->nm_fsid[0] = nap->na_filesid[0];
4753                                 nmp->nm_fsid[1] = nap->na_filesid[1];
4754                                 NFSSETHASSETFSID(nmp);
4755                                 *attrflagp = 1;
4756                         }
4757                 } else {
4758                         error = nd->nd_repstat;
4759                 }
4760                 if (error)
4761                         goto nfsmout;
4762         } else {
4763                 NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp, NULL);
4764                 error = nfscl_request(nd, vp, p, cred);
4765                 if (error)
4766                         return (error);
4767                 if (nd->nd_flag & ND_NFSV3) {
4768                         error = nfscl_postop_attr(nd, nap, attrflagp);
4769                         if (error)
4770                                 goto nfsmout;
4771                 }
4772                 if (nd->nd_repstat) {
4773                         error = nd->nd_repstat;
4774                         goto nfsmout;
4775                 }
4776                 NFSM_DISSECT(tl, u_int32_t *,
4777                     NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4778         }
4779         if (NFSHASNFSV3(nmp)) {
4780                 sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4781                 sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4782                 sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4783                 sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4784                 sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4785                 sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4786                 sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4787         } else if (NFSHASNFSV4(nmp) == 0) {
4788                 sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4789                 sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4790                 sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4791                 sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4792                 sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4793         }
4794 nfsmout:
4795         m_freem(nd->nd_mrep);
4796         return (error);
4797 }
4798
4799 /*
4800  * nfs pathconf rpc
4801  */
4802 int
4803 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4804     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4805 {
4806         struct nfsrv_descript nfsd, *nd = &nfsd;
4807         struct nfsmount *nmp;
4808         u_int32_t *tl;
4809         nfsattrbit_t attrbits;
4810         int error;
4811         struct nfsnode *np;
4812
4813         *attrflagp = 0;
4814         nmp = VFSTONFS(vp->v_mount);
4815         if (NFSHASNFSV4(nmp)) {
4816                 np = VTONFS(vp);
4817                 if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
4818                     nmp->nm_fhsize == 0) {
4819                         /* Attempt to get the actual root file handle. */
4820                         error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
4821                             cred, p);
4822                         if (error != 0)
4823                                 return (EACCES);
4824                         if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
4825                                 nfscl_statfs(vp, cred, p);
4826                 }
4827                 /*
4828                  * For V4, you actually do a getattr.
4829                  */
4830                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4831                 NFSPATHCONF_GETATTRBIT(&attrbits);
4832                 (void) nfsrv_putattrbit(nd, &attrbits);
4833                 nd->nd_flag |= ND_USEGSSNAME;
4834                 error = nfscl_request(nd, vp, p, cred);
4835                 if (error)
4836                         return (error);
4837                 if (nd->nd_repstat == 0) {
4838                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4839                             pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4840                             cred);
4841                         if (!error)
4842                                 *attrflagp = 1;
4843                 } else {
4844                         error = nd->nd_repstat;
4845                 }
4846         } else {
4847                 NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp, NULL);
4848                 error = nfscl_request(nd, vp, p, cred);
4849                 if (error)
4850                         return (error);
4851                 error = nfscl_postop_attr(nd, nap, attrflagp);
4852                 if (nd->nd_repstat && !error)
4853                         error = nd->nd_repstat;
4854                 if (!error) {
4855                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4856                         pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4857                         pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4858                         pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4859                         pc->pc_chownrestricted =
4860                             fxdr_unsigned(u_int32_t, *tl++);
4861                         pc->pc_caseinsensitive =
4862                             fxdr_unsigned(u_int32_t, *tl++);
4863                         pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4864                 }
4865         }
4866 nfsmout:
4867         m_freem(nd->nd_mrep);
4868         return (error);
4869 }
4870
4871 /*
4872  * nfs version 3 fsinfo rpc call
4873  */
4874 int
4875 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4876     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4877 {
4878         u_int32_t *tl;
4879         struct nfsrv_descript nfsd, *nd = &nfsd;
4880         int error;
4881
4882         *attrflagp = 0;
4883         NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp, NULL);
4884         error = nfscl_request(nd, vp, p, cred);
4885         if (error)
4886                 return (error);
4887         error = nfscl_postop_attr(nd, nap, attrflagp);
4888         if (nd->nd_repstat && !error)
4889                 error = nd->nd_repstat;
4890         if (!error) {
4891                 NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4892                 fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4893                 fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4894                 fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4895                 fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4896                 fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4897                 fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4898                 fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4899                 fsp->fs_maxfilesize = fxdr_hyper(tl);
4900                 tl += 2;
4901                 fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4902                 tl += 2;
4903                 fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4904         }
4905 nfsmout:
4906         m_freem(nd->nd_mrep);
4907         return (error);
4908 }
4909
4910 /*
4911  * This function performs the Renew RPC.
4912  */
4913 int
4914 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4915     NFSPROC_T *p)
4916 {
4917         u_int32_t *tl;
4918         struct nfsrv_descript nfsd;
4919         struct nfsrv_descript *nd = &nfsd;
4920         struct nfsmount *nmp;
4921         int error;
4922         struct nfssockreq *nrp;
4923         struct nfsclsession *tsep;
4924
4925         nmp = clp->nfsc_nmp;
4926         if (nmp == NULL)
4927                 return (0);
4928         if (dsp == NULL)
4929                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4930                     0, cred);
4931         else
4932                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4933                     &dsp->nfsclds_sess, 0, 0, NULL);
4934         if (!NFSHASNFSV4N(nmp)) {
4935                 /* NFSv4.1 just uses a Sequence Op and not a Renew. */
4936                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4937                 tsep = nfsmnt_mdssession(nmp);
4938                 *tl++ = tsep->nfsess_clientid.lval[0];
4939                 *tl = tsep->nfsess_clientid.lval[1];
4940         }
4941         nrp = NULL;
4942         if (dsp != NULL)
4943                 nrp = dsp->nfsclds_sockp;
4944         if (nrp == NULL)
4945                 /* If NULL, use the MDS socket. */
4946                 nrp = &nmp->nm_sockreq;
4947         nd->nd_flag |= ND_USEGSSNAME;
4948         if (dsp == NULL)
4949                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4950                     NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4951         else {
4952                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4953                     NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4954                 if (error == ENXIO)
4955                         nfscl_cancelreqs(dsp);
4956         }
4957         if (error)
4958                 return (error);
4959         error = nd->nd_repstat;
4960         m_freem(nd->nd_mrep);
4961         return (error);
4962 }
4963
4964 /*
4965  * This function performs the Releaselockowner RPC.
4966  */
4967 int
4968 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4969     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4970 {
4971         struct nfsrv_descript nfsd, *nd = &nfsd;
4972         u_int32_t *tl;
4973         int error;
4974         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4975         struct nfsclsession *tsep;
4976
4977         if (NFSHASNFSV4N(nmp)) {
4978                 /* For NFSv4.1, do a FreeStateID. */
4979                 nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4980                     NULL, 0, 0, cred);
4981                 nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4982         } else {
4983                 nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4984                     NULL, 0, 0, NULL);
4985                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4986                 tsep = nfsmnt_mdssession(nmp);
4987                 *tl++ = tsep->nfsess_clientid.lval[0];
4988                 *tl = tsep->nfsess_clientid.lval[1];
4989                 NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4990                 NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4991                 (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4992         }
4993         nd->nd_flag |= ND_USEGSSNAME;
4994         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4995             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4996         if (error)
4997                 return (error);
4998         error = nd->nd_repstat;
4999         m_freem(nd->nd_mrep);
5000         return (error);
5001 }
5002
5003 /*
5004  * This function performs the Compound to get the mount pt FH.
5005  */
5006 int
5007 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
5008     NFSPROC_T *p)
5009 {
5010         u_int32_t *tl;
5011         struct nfsrv_descript nfsd;
5012         struct nfsrv_descript *nd = &nfsd;
5013         u_char *cp, *cp2, *fhp;
5014         int error, cnt, len, setnil;
5015         u_int32_t *opcntp;
5016
5017         nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
5018             0, NULL);
5019         cp = dirpath;
5020         cnt = 0;
5021         do {
5022                 setnil = 0;
5023                 while (*cp == '/')
5024                         cp++;
5025                 cp2 = cp;
5026                 while (*cp2 != '\0' && *cp2 != '/')
5027                         cp2++;
5028                 if (*cp2 == '/') {
5029                         setnil = 1;
5030                         *cp2 = '\0';
5031                 }
5032                 if (cp2 != cp) {
5033                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
5034                         *tl = txdr_unsigned(NFSV4OP_LOOKUP);
5035                         nfsm_strtom(nd, cp, strlen(cp));
5036                         cnt++;
5037                 }
5038                 if (setnil)
5039                         *cp2++ = '/';
5040                 cp = cp2;
5041         } while (*cp != '\0');
5042         if (NFSHASNFSV4N(nmp))
5043                 /* Has a Sequence Op done by nfscl_reqstart(). */
5044                 *opcntp = txdr_unsigned(3 + cnt);
5045         else
5046                 *opcntp = txdr_unsigned(2 + cnt);
5047         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
5048         *tl = txdr_unsigned(NFSV4OP_GETFH);
5049         nd->nd_flag |= ND_USEGSSNAME;
5050         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5051                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5052         if (error)
5053                 return (error);
5054         if (nd->nd_repstat == 0) {
5055                 NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
5056                 tl += (2 + 2 * cnt);
5057                 if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
5058                         len > NFSX_FHMAX) {
5059                         nd->nd_repstat = NFSERR_BADXDR;
5060                 } else {
5061                         fhp = malloc(len + 1, M_TEMP, M_WAITOK);
5062                         nd->nd_repstat = nfsrv_mtostr(nd, fhp, len);
5063                         if (nd->nd_repstat == 0) {
5064                                 NFSLOCKMNT(nmp);
5065                                 if (nmp->nm_fhsize == 0) {
5066                                         NFSBCOPY(fhp, nmp->nm_fh, len);
5067                                         nmp->nm_fhsize = len;
5068                                 }
5069                                 NFSUNLOCKMNT(nmp);
5070                         }
5071                         free(fhp, M_TEMP);
5072                 }
5073         }
5074         error = nd->nd_repstat;
5075 nfsmout:
5076         m_freem(nd->nd_mrep);
5077         return (error);
5078 }
5079
5080 /*
5081  * This function performs the Delegreturn RPC.
5082  */
5083 int
5084 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
5085     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
5086 {
5087         u_int32_t *tl;
5088         struct nfsrv_descript nfsd;
5089         struct nfsrv_descript *nd = &nfsd;
5090         int error;
5091
5092         nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
5093             dp->nfsdl_fhlen, NULL, NULL, 0, 0, cred);
5094         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
5095         if (NFSHASNFSV4N(nmp))
5096                 *tl++ = 0;
5097         else
5098                 *tl++ = dp->nfsdl_stateid.seqid;
5099         *tl++ = dp->nfsdl_stateid.other[0];
5100         *tl++ = dp->nfsdl_stateid.other[1];
5101         *tl = dp->nfsdl_stateid.other[2];
5102         if (syscred)
5103                 nd->nd_flag |= ND_USEGSSNAME;
5104         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5105             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5106         if (error)
5107                 return (error);
5108         error = nd->nd_repstat;
5109         m_freem(nd->nd_mrep);
5110         return (error);
5111 }
5112
5113 /*
5114  * nfs getacl call.
5115  */
5116 int
5117 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
5118 {
5119         struct nfsrv_descript nfsd, *nd = &nfsd;
5120         int error;
5121         nfsattrbit_t attrbits;
5122         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5123
5124         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
5125                 return (EOPNOTSUPP);
5126         NFSCL_REQSTART(nd, NFSPROC_GETACL, vp, cred);
5127         NFSZERO_ATTRBIT(&attrbits);
5128         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5129         (void) nfsrv_putattrbit(nd, &attrbits);
5130         error = nfscl_request(nd, vp, p, cred);
5131         if (error)
5132                 return (error);
5133         if (!nd->nd_repstat)
5134                 error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
5135                     NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
5136         else
5137                 error = nd->nd_repstat;
5138         m_freem(nd->nd_mrep);
5139         return (error);
5140 }
5141
5142 /*
5143  * nfs setacl call.
5144  */
5145 int
5146 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
5147 {
5148         int error;
5149         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5150
5151         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
5152                 return (EOPNOTSUPP);
5153         error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL);
5154         return (error);
5155 }
5156
5157 /*
5158  * nfs setacl call.
5159  */
5160 static int
5161 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
5162     struct acl *aclp, nfsv4stateid_t *stateidp)
5163 {
5164         struct nfsrv_descript nfsd, *nd = &nfsd;
5165         int error;
5166         nfsattrbit_t attrbits;
5167         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5168
5169         if (!NFSHASNFSV4(nmp))
5170                 return (EOPNOTSUPP);
5171         NFSCL_REQSTART(nd, NFSPROC_SETACL, vp, cred);
5172         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
5173         NFSZERO_ATTRBIT(&attrbits);
5174         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5175         (void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
5176             &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
5177         error = nfscl_request(nd, vp, p, cred);
5178         if (error)
5179                 return (error);
5180         /* Don't care about the pre/postop attributes */
5181         m_freem(nd->nd_mrep);
5182         return (nd->nd_repstat);
5183 }
5184
5185 /*
5186  * Do the NFSv4.1 Exchange ID.
5187  */
5188 int
5189 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
5190     struct nfssockreq *nrp, int minorvers, uint32_t exchflags,
5191     struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p)
5192 {
5193         uint32_t *tl, v41flags;
5194         struct nfsrv_descript nfsd;
5195         struct nfsrv_descript *nd = &nfsd;
5196         struct nfsclds *dsp;
5197         struct timespec verstime;
5198         int error, len;
5199
5200         *dspp = NULL;
5201         if (minorvers == 0)
5202                 minorvers = nmp->nm_minorvers;
5203         nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL,
5204             NFS_VER4, minorvers, NULL);
5205         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5206         *tl++ = txdr_unsigned(nfsboottime.tv_sec);      /* Client owner */
5207         *tl = txdr_unsigned(clp->nfsc_rev);
5208         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
5209
5210         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
5211         *tl++ = txdr_unsigned(exchflags);
5212         *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
5213
5214         /* Set the implementation id4 */
5215         *tl = txdr_unsigned(1);
5216         (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
5217         (void) nfsm_strtom(nd, version, strlen(version));
5218         NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
5219         verstime.tv_sec = 1293840000;           /* Jan 1, 2011 */
5220         verstime.tv_nsec = 0;
5221         txdr_nfsv4time(&verstime, tl);
5222         nd->nd_flag |= ND_USEGSSNAME;
5223         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
5224             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5225         NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
5226             (int)nd->nd_repstat);
5227         if (error != 0)
5228                 return (error);
5229         if (nd->nd_repstat == 0) {
5230                 NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
5231                 len = fxdr_unsigned(int, *(tl + 7));
5232                 if (len < 0 || len > NFSV4_OPAQUELIMIT) {
5233                         error = NFSERR_BADXDR;
5234                         goto nfsmout;
5235                 }
5236                 dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
5237                     M_WAITOK | M_ZERO);
5238                 dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
5239                 dsp->nfsclds_servownlen = len;
5240                 dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
5241                 dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
5242                 dsp->nfsclds_sess.nfsess_sequenceid =
5243                     fxdr_unsigned(uint32_t, *tl++);
5244                 v41flags = fxdr_unsigned(uint32_t, *tl);
5245                 if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
5246                     NFSHASPNFSOPT(nmp)) {
5247                         NFSCL_DEBUG(1, "set PNFS\n");
5248                         NFSLOCKMNT(nmp);
5249                         nmp->nm_state |= NFSSTA_PNFS;
5250                         NFSUNLOCKMNT(nmp);
5251                         dsp->nfsclds_flags |= NFSCLDS_MDS;
5252                 }
5253                 if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
5254                         dsp->nfsclds_flags |= NFSCLDS_DS;
5255                 if (minorvers == NFSV42_MINORVERSION)
5256                         dsp->nfsclds_flags |= NFSCLDS_MINORV2;
5257                 if (len > 0)
5258                         nd->nd_repstat = nfsrv_mtostr(nd,
5259                             dsp->nfsclds_serverown, len);
5260                 if (nd->nd_repstat == 0) {
5261                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5262                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5263                             NULL, MTX_DEF);
5264                         nfscl_initsessionslots(&dsp->nfsclds_sess);
5265                         *dspp = dsp;
5266                 } else
5267                         free(dsp, M_NFSCLDS);
5268         }
5269         error = nd->nd_repstat;
5270 nfsmout:
5271         m_freem(nd->nd_mrep);
5272         return (error);
5273 }
5274
5275 /*
5276  * Do the NFSv4.1 Create Session.
5277  */
5278 int
5279 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
5280     struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds,
5281     struct ucred *cred, NFSPROC_T *p)
5282 {
5283         uint32_t crflags, maxval, *tl;
5284         struct nfsrv_descript nfsd;
5285         struct nfsrv_descript *nd = &nfsd;
5286         int error, irdcnt, minorvers;
5287
5288         /* Make sure nm_rsize, nm_wsize is set. */
5289         if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
5290                 nmp->nm_rsize = NFS_MAXBSIZE;
5291         if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
5292                 nmp->nm_wsize = NFS_MAXBSIZE;
5293         if (dsp == NULL)
5294                 minorvers = nmp->nm_minorvers;
5295         else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0)
5296                 minorvers = NFSV42_MINORVERSION;
5297         else
5298                 minorvers = NFSV41_MINORVERSION;
5299         nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL,
5300             NFS_VER4, minorvers, NULL);
5301         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5302         *tl++ = sep->nfsess_clientid.lval[0];
5303         *tl++ = sep->nfsess_clientid.lval[1];
5304         *tl++ = txdr_unsigned(sequenceid);
5305         crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
5306         if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
5307                 crflags |= NFSV4CRSESS_CONNBACKCHAN;
5308         *tl = txdr_unsigned(crflags);
5309
5310         /* Fill in fore channel attributes. */
5311         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5312         *tl++ = 0;                              /* Header pad size */
5313         if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >=
5314             nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) {
5315                 /*
5316                  * NFSv4.2 Extended Attribute operations may want to do
5317                  * requests/replies that are larger than nm_rsize/nm_wsize.
5318                  */
5319                 *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5320                 *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5321         } else {
5322                 *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);
5323                 *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);
5324         }
5325         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
5326         *tl++ = txdr_unsigned(20);              /* Max operations */
5327         *tl++ = txdr_unsigned(64);              /* Max slots */
5328         *tl = 0;                                /* No rdma ird */
5329
5330         /* Fill in back channel attributes. */
5331         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5332         *tl++ = 0;                              /* Header pad size */
5333         *tl++ = txdr_unsigned(10000);           /* Max request size */
5334         *tl++ = txdr_unsigned(10000);           /* Max response size */
5335         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
5336         *tl++ = txdr_unsigned(4);               /* Max operations */
5337         *tl++ = txdr_unsigned(NFSV4_CBSLOTS);   /* Max slots */
5338         *tl = 0;                                /* No rdma ird */
5339
5340         NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
5341         *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */
5342
5343         /* Allow AUTH_SYS callbacks as uid, gid == 0. */
5344         *tl++ = txdr_unsigned(1);               /* Auth_sys only */
5345         *tl++ = txdr_unsigned(AUTH_SYS);        /* AUTH_SYS type */
5346         *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
5347         *tl++ = 0;                              /* Null machine name */
5348         *tl++ = 0;                              /* Uid == 0 */
5349         *tl++ = 0;                              /* Gid == 0 */
5350         *tl = 0;                                /* No additional gids */
5351         nd->nd_flag |= ND_USEGSSNAME;
5352         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
5353             NFS_VER4, NULL, 1, NULL, NULL);
5354         if (error != 0)
5355                 return (error);
5356         if (nd->nd_repstat == 0) {
5357                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
5358                     2 * NFSX_UNSIGNED);
5359                 bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
5360                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
5361                 sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
5362                 crflags = fxdr_unsigned(uint32_t, *tl);
5363                 if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
5364                         NFSLOCKMNT(nmp);
5365                         nmp->nm_state |= NFSSTA_SESSPERSIST;
5366                         NFSUNLOCKMNT(nmp);
5367                 }
5368
5369                 /* Get the fore channel slot count. */
5370                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5371                 tl++;                   /* Skip the header pad size. */
5372
5373                 /* Make sure nm_wsize is small enough. */
5374                 maxval = fxdr_unsigned(uint32_t, *tl++);
5375                 while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
5376                         if (nmp->nm_wsize > 8096)
5377                                 nmp->nm_wsize /= 2;
5378                         else
5379                                 break;
5380                 }
5381                 sep->nfsess_maxreq = maxval;
5382
5383                 /* Make sure nm_rsize is small enough. */
5384                 maxval = fxdr_unsigned(uint32_t, *tl++);
5385                 while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
5386                         if (nmp->nm_rsize > 8096)
5387                                 nmp->nm_rsize /= 2;
5388                         else
5389                                 break;
5390                 }
5391                 sep->nfsess_maxresp = maxval;
5392
5393                 sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
5394                 tl++;
5395                 sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
5396                 NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
5397                 irdcnt = fxdr_unsigned(int, *tl);
5398                 if (irdcnt < 0 || irdcnt > 1) {
5399                         error = NFSERR_BADXDR;
5400                         goto nfsmout;
5401                 }
5402                 if (irdcnt > 0)
5403                         NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
5404
5405                 /* and the back channel slot count. */
5406                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5407                 tl += 5;
5408                 sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
5409                 NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
5410         }
5411         error = nd->nd_repstat;
5412 nfsmout:
5413         m_freem(nd->nd_mrep);
5414         return (error);
5415 }
5416
5417 /*
5418  * Do the NFSv4.1 Destroy Client.
5419  */
5420 int
5421 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
5422     struct ucred *cred, NFSPROC_T *p)
5423 {
5424         uint32_t *tl;
5425         struct nfsrv_descript nfsd;
5426         struct nfsrv_descript *nd = &nfsd;
5427         int error;
5428         struct nfsclsession *tsep;
5429
5430         nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
5431             0, NULL);
5432         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5433         tsep = nfsmnt_mdssession(nmp);
5434         *tl++ = tsep->nfsess_clientid.lval[0];
5435         *tl = tsep->nfsess_clientid.lval[1];
5436         nd->nd_flag |= ND_USEGSSNAME;
5437         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5438             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5439         if (error != 0)
5440                 return (error);
5441         error = nd->nd_repstat;
5442         m_freem(nd->nd_mrep);
5443         return (error);
5444 }
5445
5446 /*
5447  * Do the NFSv4.1 LayoutGet.
5448  */
5449 static int
5450 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
5451     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
5452     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
5453     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p)
5454 {
5455         struct nfsrv_descript nfsd, *nd = &nfsd;
5456         int error;
5457
5458         nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
5459             0, cred);
5460         nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
5461             layouttype, layoutlen, 0);
5462         nd->nd_flag |= ND_USEGSSNAME;
5463         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5464             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5465         NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
5466         if (error != 0)
5467                 return (error);
5468         if (nd->nd_repstat == 0)
5469                 error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep,
5470                     flhp);
5471         if (error == 0 && nd->nd_repstat != 0)
5472                 error = nd->nd_repstat;
5473         m_freem(nd->nd_mrep);
5474         return (error);
5475 }
5476
5477 /*
5478  * Do the NFSv4.1 Get Device Info.
5479  */
5480 int
5481 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
5482     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
5483     NFSPROC_T *p)
5484 {
5485         uint32_t cnt, *tl, vers, minorvers;
5486         struct nfsrv_descript nfsd;
5487         struct nfsrv_descript *nd = &nfsd;
5488         struct sockaddr_in sin, ssin;
5489         struct sockaddr_in6 sin6, ssin6;
5490         struct nfsclds *dsp = NULL, **dspp, **gotdspp;
5491         struct nfscldevinfo *ndi;
5492         int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j;
5493         int stripecnt;
5494         uint8_t stripeindex;
5495         sa_family_t af, safilled;
5496
5497         ssin.sin_port = 0;              /* To shut up compiler. */
5498         ssin.sin_addr.s_addr = 0;       /* ditto */
5499         *ndip = NULL;
5500         ndi = NULL;
5501         gotdspp = NULL;
5502         nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
5503             0, cred);
5504         NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5505         NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
5506         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5507         *tl++ = txdr_unsigned(layouttype);
5508         *tl++ = txdr_unsigned(100000);
5509         if (notifybitsp != NULL && *notifybitsp != 0) {
5510                 *tl = txdr_unsigned(1);         /* One word of bits. */
5511                 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5512                 *tl = txdr_unsigned(*notifybitsp);
5513         } else
5514                 *tl = txdr_unsigned(0);
5515         nd->nd_flag |= ND_USEGSSNAME;
5516         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5517             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5518         if (error != 0)
5519                 return (error);
5520         if (nd->nd_repstat == 0) {
5521                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5522                 if (layouttype != fxdr_unsigned(int, *tl))
5523                         printf("EEK! devinfo layout type not same!\n");
5524                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
5525                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5526                         stripecnt = fxdr_unsigned(int, *tl);
5527                         NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
5528                         if (stripecnt < 1 || stripecnt > 4096) {
5529                                 printf("pNFS File layout devinfo stripecnt %d:"
5530                                     " out of range\n", stripecnt);
5531                                 error = NFSERR_BADXDR;
5532                                 goto nfsmout;
5533                         }
5534                         NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
5535                             NFSX_UNSIGNED);
5536                         addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
5537                         NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
5538                         if (addrcnt < 1 || addrcnt > 128) {
5539                                 printf("NFS devinfo addrcnt %d: out of range\n",
5540                                     addrcnt);
5541                                 error = NFSERR_BADXDR;
5542                                 goto nfsmout;
5543                         }
5544
5545                         /*
5546                          * Now we know how many stripe indices and addresses, so
5547                          * we can allocate the structure the correct size.
5548                          */
5549                         i = (stripecnt * sizeof(uint8_t)) /
5550                             sizeof(struct nfsclds *) + 1;
5551                         NFSCL_DEBUG(4, "stripeindices=%d\n", i);
5552                         ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
5553                             sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
5554                             M_ZERO);
5555                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5556                             NFSX_V4DEVICEID);
5557                         ndi->nfsdi_refcnt = 0;
5558                         ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5559                         ndi->nfsdi_stripecnt = stripecnt;
5560                         ndi->nfsdi_addrcnt = addrcnt;
5561                         /* Fill in the stripe indices. */
5562                         for (i = 0; i < stripecnt; i++) {
5563                                 stripeindex = fxdr_unsigned(uint8_t, *tl++);
5564                                 NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5565                                 if (stripeindex >= addrcnt) {
5566                                         printf("pNFS File Layout devinfo"
5567                                             " stripeindex %d: too big\n",
5568                                             (int)stripeindex);
5569                                         error = NFSERR_BADXDR;
5570                                         goto nfsmout;
5571                                 }
5572                                 nfsfldi_setstripeindex(ndi, i, stripeindex);
5573                         }
5574                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
5575                         /* For Flex File, we only get one address list. */
5576                         ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5577                             M_NFSDEVINFO, M_WAITOK | M_ZERO);
5578                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5579                             NFSX_V4DEVICEID);
5580                         ndi->nfsdi_refcnt = 0;
5581                         ndi->nfsdi_flags = NFSDI_FLEXFILE;
5582                         addrcnt = ndi->nfsdi_addrcnt = 1;
5583                 }
5584
5585                 /* Now, dissect the server address(es). */
5586                 safilled = AF_UNSPEC;
5587                 for (i = 0; i < addrcnt; i++) {
5588                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5589                         cnt = fxdr_unsigned(uint32_t, *tl);
5590                         if (cnt == 0) {
5591                                 printf("NFS devinfo 0 len addrlist\n");
5592                                 error = NFSERR_BADXDR;
5593                                 goto nfsmout;
5594                         }
5595                         dspp = nfsfldi_addr(ndi, i);
5596                         safilled = AF_UNSPEC;
5597                         for (j = 0; j < cnt; j++) {
5598                                 error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5599                                     &isudp);
5600                                 if (error != 0 && error != EPERM) {
5601                                         error = NFSERR_BADXDR;
5602                                         goto nfsmout;
5603                                 }
5604                                 if (error == 0 && isudp == 0) {
5605                                         /*
5606                                          * The priority is:
5607                                          * - Same address family.
5608                                          * Save the address and dspp, so that
5609                                          * the connection can be done after
5610                                          * parsing is complete.
5611                                          */
5612                                         if (safilled == AF_UNSPEC ||
5613                                             (af == nmp->nm_nam->sa_family &&
5614                                              safilled != nmp->nm_nam->sa_family)
5615                                            ) {
5616                                                 if (af == AF_INET)
5617                                                         ssin = sin;
5618                                                 else
5619                                                         ssin6 = sin6;
5620                                                 safilled = af;
5621                                                 gotdspp = dspp;
5622                                         }
5623                                 }
5624                         }
5625                 }
5626
5627                 gotvers = NFS_VER4;     /* Default NFSv4.1 for File Layout. */
5628                 gotminor = NFSV41_MINORVERSION;
5629                 /* For Flex File, we will take one of the versions to use. */
5630                 if (layouttype == NFSLAYOUT_FLEXFILE) {
5631                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5632                         j = fxdr_unsigned(int, *tl);
5633                         if (j < 1 || j > NFSDEV_MAXVERS) {
5634                                 printf("pNFS: too many versions\n");
5635                                 error = NFSERR_BADXDR;
5636                                 goto nfsmout;
5637                         }
5638                         gotvers = 0;
5639                         gotminor = 0;
5640                         for (i = 0; i < j; i++) {
5641                                 NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5642                                 vers = fxdr_unsigned(uint32_t, *tl++);
5643                                 minorvers = fxdr_unsigned(uint32_t, *tl++);
5644                                 if (vers == NFS_VER3)
5645                                         minorvers = 0;
5646                                 if ((vers == NFS_VER4 && ((minorvers ==
5647                                     NFSV41_MINORVERSION && gotminor == 0) ||
5648                                     minorvers == NFSV42_MINORVERSION)) ||
5649                                     (vers == NFS_VER3 && gotvers == 0)) {
5650                                         gotvers = vers;
5651                                         gotminor = minorvers;
5652                                         /* We'll take this one. */
5653                                         ndi->nfsdi_versindex = i;
5654                                         ndi->nfsdi_vers = vers;
5655                                         ndi->nfsdi_minorvers = minorvers;
5656                                         ndi->nfsdi_rsize = fxdr_unsigned(
5657                                             uint32_t, *tl++);
5658                                         ndi->nfsdi_wsize = fxdr_unsigned(
5659                                             uint32_t, *tl++);
5660                                         if (*tl == newnfs_true)
5661                                                 ndi->nfsdi_flags |=
5662                                                     NFSDI_TIGHTCOUPLED;
5663                                         else
5664                                                 ndi->nfsdi_flags &=
5665                                                     ~NFSDI_TIGHTCOUPLED;
5666                                 }
5667                         }
5668                         if (gotvers == 0) {
5669                                 printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n");
5670                                 error = NFSERR_BADXDR;
5671                                 goto nfsmout;
5672                         }
5673                 }
5674
5675                 /* And the notify bits. */
5676                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5677                 bitcnt = fxdr_unsigned(int, *tl);
5678                 if (bitcnt > 0) {
5679                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5680                         if (notifybitsp != NULL)
5681                                 *notifybitsp =
5682                                     fxdr_unsigned(uint32_t, *tl);
5683                 }
5684                 if (safilled != AF_UNSPEC) {
5685                         KASSERT(ndi != NULL, ("ndi is NULL"));
5686                         *ndip = ndi;
5687                 } else
5688                         error = EPERM;
5689                 if (error == 0) {
5690                         /*
5691                          * Now we can do a TCP connection for the correct
5692                          * NFS version and IP address.
5693                          */
5694                         error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5695                             gotvers, gotminor, &dsp, p);
5696                 }
5697                 if (error == 0) {
5698                         KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5699                         *gotdspp = dsp;
5700                 }
5701         }
5702         if (nd->nd_repstat != 0 && error == 0)
5703                 error = nd->nd_repstat;
5704 nfsmout:
5705         if (error != 0 && ndi != NULL)
5706                 nfscl_freedevinfo(ndi);
5707         m_freem(nd->nd_mrep);
5708         return (error);
5709 }
5710
5711 /*
5712  * Do the NFSv4.1 LayoutCommit.
5713  */
5714 int
5715 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5716     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5717     int layouttype, struct ucred *cred, NFSPROC_T *p)
5718 {
5719         uint32_t *tl;
5720         struct nfsrv_descript nfsd, *nd = &nfsd;
5721         int error;
5722
5723         nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5724             0, 0, cred);
5725         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5726             NFSX_STATEID);
5727         txdr_hyper(off, tl);
5728         tl += 2;
5729         txdr_hyper(len, tl);
5730         tl += 2;
5731         if (reclaim != 0)
5732                 *tl++ = newnfs_true;
5733         else
5734                 *tl++ = newnfs_false;
5735         *tl++ = txdr_unsigned(stateidp->seqid);
5736         *tl++ = stateidp->other[0];
5737         *tl++ = stateidp->other[1];
5738         *tl++ = stateidp->other[2];
5739         *tl++ = newnfs_true;
5740         if (lastbyte < off)
5741                 lastbyte = off;
5742         else if (lastbyte >= (off + len))
5743                 lastbyte = off + len - 1;
5744         txdr_hyper(lastbyte, tl);
5745         tl += 2;
5746         *tl++ = newnfs_false;
5747         *tl++ = txdr_unsigned(layouttype);
5748         /* All supported layouts are 0 length. */
5749         *tl = txdr_unsigned(0);
5750         nd->nd_flag |= ND_USEGSSNAME;
5751         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5752             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5753         if (error != 0)
5754                 return (error);
5755         error = nd->nd_repstat;
5756         m_freem(nd->nd_mrep);
5757         return (error);
5758 }
5759
5760 /*
5761  * Do the NFSv4.1 LayoutReturn.
5762  */
5763 int
5764 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5765     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5766     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5767     uint32_t stat, uint32_t op, char *devid)
5768 {
5769         uint32_t *tl;
5770         struct nfsrv_descript nfsd, *nd = &nfsd;
5771         uint64_t tu64;
5772         int error;
5773
5774         nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5775             0, 0, cred);
5776         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5777         if (reclaim != 0)
5778                 *tl++ = newnfs_true;
5779         else
5780                 *tl++ = newnfs_false;
5781         *tl++ = txdr_unsigned(layouttype);
5782         *tl++ = txdr_unsigned(iomode);
5783         *tl = txdr_unsigned(layoutreturn);
5784         if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5785                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5786                     NFSX_UNSIGNED);
5787                 txdr_hyper(offset, tl);
5788                 tl += 2;
5789                 txdr_hyper(len, tl);
5790                 tl += 2;
5791                 NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5792                 *tl++ = txdr_unsigned(stateidp->seqid);
5793                 *tl++ = stateidp->other[0];
5794                 *tl++ = stateidp->other[1];
5795                 *tl++ = stateidp->other[2];
5796                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5797                         *tl = txdr_unsigned(0);
5798                 else if (layouttype == NFSLAYOUT_FLEXFILE) {
5799                         if (stat != 0) {
5800                                 *tl = txdr_unsigned(2 * NFSX_HYPER +
5801                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5802                                     NFSX_UNSIGNED);
5803                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5804                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5805                                     NFSX_UNSIGNED);
5806                                 *tl++ = txdr_unsigned(1);       /* One error. */
5807                                 tu64 = 0;                       /* Offset. */
5808                                 txdr_hyper(tu64, tl); tl += 2;
5809                                 tu64 = UINT64_MAX;              /* Length. */
5810                                 txdr_hyper(tu64, tl); tl += 2;
5811                                 NFSBCOPY(stateidp, tl, NFSX_STATEID);
5812                                 tl += (NFSX_STATEID / NFSX_UNSIGNED);
5813                                 *tl++ = txdr_unsigned(1);       /* One error. */
5814                                 NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5815                                 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5816                                 *tl++ = txdr_unsigned(stat);
5817                                 *tl++ = txdr_unsigned(op);
5818                         } else {
5819                                 *tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5820                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5821                                 /* No ioerrs. */
5822                                 *tl++ = 0;
5823                         }
5824                         *tl = 0;        /* No stats yet. */
5825                 }
5826         }
5827         nd->nd_flag |= ND_USEGSSNAME;
5828         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5829             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5830         if (error != 0)
5831                 return (error);
5832         if (nd->nd_repstat == 0) {
5833                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5834                 if (*tl != 0) {
5835                         NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5836                         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5837                         stateidp->other[0] = *tl++;
5838                         stateidp->other[1] = *tl++;
5839                         stateidp->other[2] = *tl;
5840                 }
5841         } else
5842                 error = nd->nd_repstat;
5843 nfsmout:
5844         m_freem(nd->nd_mrep);
5845         return (error);
5846 }
5847
5848 /*
5849  * Do the NFSv4.2 LayoutError.
5850  */
5851 static int
5852 nfsrpc_layouterror(struct nfsmount *nmp, uint8_t *fh, int fhlen, uint64_t offset,
5853     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5854     uint32_t stat, uint32_t op, char *devid)
5855 {
5856         uint32_t *tl;
5857         struct nfsrv_descript nfsd, *nd = &nfsd;
5858         int error;
5859
5860         nfscl_reqstart(nd, NFSPROC_LAYOUTERROR, nmp, fh, fhlen, NULL, NULL,
5861             0, 0, cred);
5862         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5863             NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5864         txdr_hyper(offset, tl); tl += 2;
5865         txdr_hyper(len, tl); tl += 2;
5866         *tl++ = txdr_unsigned(stateidp->seqid);
5867         *tl++ = stateidp->other[0];
5868         *tl++ = stateidp->other[1];
5869         *tl++ = stateidp->other[2];
5870         *tl++ = txdr_unsigned(1);
5871         NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5872         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5873         *tl++ = txdr_unsigned(stat);
5874         *tl = txdr_unsigned(op);
5875         nd->nd_flag |= ND_USEGSSNAME;
5876         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5877             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5878         if (error != 0)
5879                 return (error);
5880         if (nd->nd_repstat != 0)
5881                 error = nd->nd_repstat;
5882         m_freem(nd->nd_mrep);
5883         return (error);
5884 }
5885
5886 /*
5887  * Acquire a layout and devinfo, if possible. The caller must have acquired
5888  * a reference count on the nfsclclient structure before calling this.
5889  * Return the layout in lypp with a reference count on it, if successful.
5890  */
5891 static int
5892 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5893     int iomode, uint32_t rw, uint32_t *notifybitsp, nfsv4stateid_t *stateidp,
5894     uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5895 {
5896         struct nfscllayout *lyp;
5897         struct nfsclflayout *flp;
5898         struct nfsclflayouthead flh;
5899         int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5900         nfsv4stateid_t stateid;
5901         struct nfsclsession *tsep;
5902
5903         *lypp = NULL;
5904         if (NFSHASFLEXFILE(nmp))
5905                 layouttype = NFSLAYOUT_FLEXFILE;
5906         else
5907                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5908         /*
5909          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5910          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5911          * flp == NULL.
5912          */
5913         lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5914             off, rw, &flp, &recalled);
5915         islocked = 0;
5916         if (lyp == NULL || flp == NULL) {
5917                 if (recalled != 0)
5918                         return (EIO);
5919                 LIST_INIT(&flh);
5920                 tsep = nfsmnt_mdssession(nmp);
5921                 layoutlen = tsep->nfsess_maxcache -
5922                     (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5923                 if (lyp == NULL) {
5924                         stateid.seqid = 0;
5925                         stateid.other[0] = stateidp->other[0];
5926                         stateid.other[1] = stateidp->other[1];
5927                         stateid.other[2] = stateidp->other[2];
5928                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5929                             nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5930                             (uint64_t)0, layouttype, layoutlen, &stateid,
5931                             &retonclose, &flh, cred, p);
5932                 } else {
5933                         islocked = 1;
5934                         stateid.seqid = lyp->nfsly_stateid.seqid;
5935                         stateid.other[0] = lyp->nfsly_stateid.other[0];
5936                         stateid.other[1] = lyp->nfsly_stateid.other[1];
5937                         stateid.other[2] = lyp->nfsly_stateid.other[2];
5938                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5939                             nfhp->nfh_len, iomode, off, UINT64_MAX,
5940                             (uint64_t)0, layouttype, layoutlen, &stateid,
5941                             &retonclose, &flh, cred, p);
5942                 }
5943                 error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5944                     nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5945                     &flh, layouttype, error, NULL, cred, p);
5946                 if (error == 0)
5947                         *lypp = lyp;
5948                 else if (islocked != 0)
5949                         nfscl_rellayout(lyp, 1);
5950         } else
5951                 *lypp = lyp;
5952         return (error);
5953 }
5954
5955 /*
5956  * Do a TCP connection plus exchange id and create session.
5957  * If successful, a "struct nfsclds" is linked into the list for the
5958  * mount point and a pointer to it is returned.
5959  */
5960 static int
5961 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5962     struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers,
5963     struct nfsclds **dspp, NFSPROC_T *p)
5964 {
5965         struct sockaddr_in *msad, *sad;
5966         struct sockaddr_in6 *msad6, *sad6;
5967         struct nfsclclient *clp;
5968         struct nfssockreq *nrp;
5969         struct nfsclds *dsp, *tdsp;
5970         int error, firsttry;
5971         enum nfsclds_state retv;
5972         uint32_t sequenceid = 0;
5973
5974         KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5975             ("nfsrpc_fillsa: NULL nr_cred"));
5976         NFSLOCKCLSTATE();
5977         clp = nmp->nm_clp;
5978         NFSUNLOCKCLSTATE();
5979         if (clp == NULL)
5980                 return (EPERM);
5981         if (af == AF_INET) {
5982                 NFSLOCKMNT(nmp);
5983                 /*
5984                  * Check to see if we already have a session for this
5985                  * address that is usable for a DS.
5986                  * Note that the MDS's address is in a different place
5987                  * than the sessions already acquired for DS's.
5988                  */
5989                 msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5990                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5991                 while (tdsp != NULL) {
5992                         if (msad != NULL && msad->sin_family == AF_INET &&
5993                             sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5994                             sin->sin_port == msad->sin_port &&
5995                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5996                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5997                                 *dspp = tdsp;
5998                                 NFSUNLOCKMNT(nmp);
5999                                 NFSCL_DEBUG(4, "fnd same addr\n");
6000                                 return (0);
6001                         }
6002                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
6003                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
6004                                 msad = (struct sockaddr_in *)
6005                                     tdsp->nfsclds_sockp->nr_nam;
6006                         else
6007                                 msad = NULL;
6008                 }
6009                 NFSUNLOCKMNT(nmp);
6010
6011                 /* No IP address match, so look for new/trunked one. */
6012                 sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
6013                 sad->sin_len = sizeof(*sad);
6014                 sad->sin_family = AF_INET;
6015                 sad->sin_port = sin->sin_port;
6016                 sad->sin_addr.s_addr = sin->sin_addr.s_addr;
6017                 if (NFSHASPNFS(nmp) && NFSHASKERB(nmp)) {
6018                         /* For pNFS, a separate server principal is needed. */
6019                         nrp = malloc(sizeof(*nrp) + NI_MAXSERV + NI_MAXHOST,
6020                             M_NFSSOCKREQ, M_WAITOK | M_ZERO);
6021                         /*
6022                          * Use the latter part of nr_srvprinc as a temporary
6023                          * buffer for the IP address.
6024                          */
6025                         inet_ntoa_r(sad->sin_addr,
6026                             &nrp->nr_srvprinc[NI_MAXSERV]);
6027                         NFSCL_DEBUG(1, "nfsrpc_fillsa: DS IP=%s\n",
6028                             &nrp->nr_srvprinc[NI_MAXSERV]);
6029                         if (!rpc_gss_ip_to_srv_principal_call(
6030                             &nrp->nr_srvprinc[NI_MAXSERV], "nfs",
6031                             nrp->nr_srvprinc))
6032                                 nrp->nr_srvprinc[0] = '\0';
6033                         NFSCL_DEBUG(1, "nfsrpc_fillsa: srv principal=%s\n",
6034                             nrp->nr_srvprinc);
6035                 } else
6036                         nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ,
6037                             M_WAITOK | M_ZERO);
6038                 nrp->nr_nam = (struct sockaddr *)sad;
6039         } else if (af == AF_INET6) {
6040                 NFSLOCKMNT(nmp);
6041                 /*
6042                  * Check to see if we already have a session for this
6043                  * address that is usable for a DS.
6044                  * Note that the MDS's address is in a different place
6045                  * than the sessions already acquired for DS's.
6046                  */
6047                 msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
6048                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
6049                 while (tdsp != NULL) {
6050                         if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
6051                             IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6052                             &msad6->sin6_addr) &&
6053                             sin6->sin6_port == msad6->sin6_port &&
6054                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
6055                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
6056                                 *dspp = tdsp;
6057                                 NFSUNLOCKMNT(nmp);
6058                                 return (0);
6059                         }
6060                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
6061                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
6062                                 msad6 = (struct sockaddr_in6 *)
6063                                     tdsp->nfsclds_sockp->nr_nam;
6064                         else
6065                                 msad6 = NULL;
6066                 }
6067                 NFSUNLOCKMNT(nmp);
6068
6069                 /* No IP address match, so look for new/trunked one. */
6070                 sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
6071                 sad6->sin6_len = sizeof(*sad6);
6072                 sad6->sin6_family = AF_INET6;
6073                 sad6->sin6_port = sin6->sin6_port;
6074                 NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
6075                     sizeof(struct in6_addr));
6076                 if (NFSHASPNFS(nmp) && NFSHASKERB(nmp)) {
6077                         /* For pNFS, a separate server principal is needed. */
6078                         nrp = malloc(sizeof(*nrp) + NI_MAXSERV + NI_MAXHOST,
6079                             M_NFSSOCKREQ, M_WAITOK | M_ZERO);
6080                         /*
6081                          * Use the latter part of nr_srvprinc as a temporary
6082                          * buffer for the IP address.
6083                          */
6084                         inet_ntop(AF_INET6, &sad6->sin6_addr,
6085                             &nrp->nr_srvprinc[NI_MAXSERV], NI_MAXHOST);
6086                         NFSCL_DEBUG(1, "nfsrpc_fillsa: DS IP=%s\n",
6087                             &nrp->nr_srvprinc[NI_MAXSERV]);
6088                         if (!rpc_gss_ip_to_srv_principal_call(
6089                             &nrp->nr_srvprinc[NI_MAXSERV], "nfs",
6090                             nrp->nr_srvprinc))
6091                                 nrp->nr_srvprinc[0] = '\0';
6092                         NFSCL_DEBUG(1, "nfsrpc_fillsa: srv principal=%s\n",
6093                             nrp->nr_srvprinc);
6094                 } else
6095                         nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ,
6096                             M_WAITOK | M_ZERO);
6097                 nrp->nr_nam = (struct sockaddr *)sad6;
6098         } else
6099                 return (EPERM);
6100
6101         nrp->nr_sotype = SOCK_STREAM;
6102         mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
6103         nrp->nr_prog = NFS_PROG;
6104         nrp->nr_vers = vers;
6105
6106         /*
6107          * Use the credentials that were used for the mount, which are
6108          * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
6109          * Ref. counting the credentials with crhold() is probably not
6110          * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
6111          * unmount, but I did it anyhow.
6112          */
6113         nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
6114         error = newnfs_connect(nmp, nrp, NULL, p, 0, false, &nrp->nr_client);
6115         NFSCL_DEBUG(3, "DS connect=%d\n", error);
6116
6117         dsp = NULL;
6118         /* Now, do the exchangeid and create session. */
6119         if (error == 0) {
6120                 if (vers == NFS_VER4) {
6121                         firsttry = 0;
6122                         do {
6123                                 error = nfsrpc_exchangeid(nmp, clp, nrp, 
6124                                     minorvers, NFSV4EXCH_USEPNFSDS, &dsp,
6125                                     nrp->nr_cred, p);
6126                                 NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
6127                                 if (error == NFSERR_MINORVERMISMATCH)
6128                                         minorvers = NFSV42_MINORVERSION;
6129                         } while (error == NFSERR_MINORVERMISMATCH &&
6130                             firsttry++ == 0);
6131                         if (error != 0)
6132                                 newnfs_disconnect(NULL, nrp);
6133                 } else {
6134                         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
6135                             M_WAITOK | M_ZERO);
6136                         dsp->nfsclds_flags |= NFSCLDS_DS;
6137                         dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
6138                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
6139                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
6140                             NULL, MTX_DEF);
6141                 }
6142         }
6143         if (error == 0) {
6144                 dsp->nfsclds_sockp = nrp;
6145                 if (vers == NFS_VER4) {
6146                         NFSLOCKMNT(nmp);
6147                         retv = nfscl_getsameserver(nmp, dsp, &tdsp,
6148                             &sequenceid);
6149                         NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
6150                         if (retv == NFSDSP_USETHISSESSION &&
6151                             nfscl_dssameconn != 0) {
6152                                 NFSLOCKDS(tdsp);
6153                                 tdsp->nfsclds_flags |= NFSCLDS_SAMECONN;
6154                                 NFSUNLOCKDS(tdsp);
6155                                 NFSUNLOCKMNT(nmp);
6156                                 /*
6157                                  * If there is already a session for this
6158                                  * server, use it.
6159                                  */
6160                                 newnfs_disconnect(NULL, nrp);
6161                                 nfscl_freenfsclds(dsp);
6162                                 *dspp = tdsp;
6163                                 return (0);
6164                         }
6165                         if (retv == NFSDSP_NOTFOUND)
6166                                 sequenceid =
6167                                     dsp->nfsclds_sess.nfsess_sequenceid;
6168                         NFSUNLOCKMNT(nmp);
6169                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
6170                             nrp, dsp, sequenceid, 0, nrp->nr_cred, p);
6171                         NFSCL_DEBUG(3, "DS createsess=%d\n", error);
6172                 }
6173         } else {
6174                 NFSFREECRED(nrp->nr_cred);
6175                 NFSFREEMUTEX(&nrp->nr_mtx);
6176                 free(nrp->nr_nam, M_SONAME);
6177                 free(nrp, M_NFSSOCKREQ);
6178         }
6179         if (error == 0) {
6180                 NFSCL_DEBUG(3, "add DS session\n");
6181                 /*
6182                  * Put it at the end of the list. That way the list
6183                  * is ordered by when the entry was added. This matters
6184                  * since the one done first is the one that should be
6185                  * used for sequencid'ing any subsequent create sessions.
6186                  */
6187                 NFSLOCKMNT(nmp);
6188                 TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
6189                 NFSUNLOCKMNT(nmp);
6190                 *dspp = dsp;
6191         } else if (dsp != NULL) {
6192                 newnfs_disconnect(NULL, nrp);
6193                 nfscl_freenfsclds(dsp);
6194         }
6195         return (error);
6196 }
6197
6198 /*
6199  * Do the NFSv4.1 Reclaim Complete.
6200  */
6201 int
6202 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
6203 {
6204         uint32_t *tl;
6205         struct nfsrv_descript nfsd;
6206         struct nfsrv_descript *nd = &nfsd;
6207         int error;
6208
6209         nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
6210             0, cred);
6211         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6212         *tl = newnfs_false;
6213         nd->nd_flag |= ND_USEGSSNAME;
6214         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
6215             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6216         if (error != 0)
6217                 return (error);
6218         error = nd->nd_repstat;
6219         m_freem(nd->nd_mrep);
6220         return (error);
6221 }
6222
6223 /*
6224  * Initialize the slot tables for a session.
6225  */
6226 static void
6227 nfscl_initsessionslots(struct nfsclsession *sep)
6228 {
6229         int i;
6230
6231         for (i = 0; i < NFSV4_CBSLOTS; i++) {
6232                 if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
6233                         m_freem(sep->nfsess_cbslots[i].nfssl_reply);
6234                 NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
6235         }
6236         for (i = 0; i < 64; i++)
6237                 sep->nfsess_slotseq[i] = 0;
6238         sep->nfsess_slots = 0;
6239         sep->nfsess_badslots = 0;
6240 }
6241
6242 /*
6243  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
6244  */
6245 int
6246 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6247     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
6248 {
6249         struct nfsnode *np = VTONFS(vp);
6250         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6251         struct nfscllayout *layp;
6252         struct nfscldevinfo *dip;
6253         struct nfsclflayout *rflp;
6254         struct mbuf *m, *m2;
6255         struct nfsclwritedsdorpc *drpc, *tdrpc;
6256         nfsv4stateid_t stateid;
6257         struct ucred *newcred;
6258         uint64_t lastbyte, len, off, oresid, xfer;
6259         int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
6260         void *lckp;
6261         uint8_t *dev;
6262         void *iovbase = NULL;
6263         size_t iovlen = 0;
6264         off_t offs = 0;
6265         ssize_t resid = 0;
6266         uint32_t op;
6267
6268         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
6269             (np->n_flag & NNOLAYOUT) != 0)
6270                 return (EIO);
6271         /* Now, get a reference cnt on the clientid for this mount. */
6272         if (nfscl_getref(nmp) == 0)
6273                 return (EIO);
6274
6275         /* Find an appropriate stateid. */
6276         newcred = NFSNEWCRED(cred);
6277         error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
6278             rwaccess, 1, newcred, p, &stateid, &lckp);
6279         if (error != 0) {
6280                 NFSFREECRED(newcred);
6281                 nfscl_relref(nmp);
6282                 return (error);
6283         }
6284         /* Search for a layout for this file. */
6285         off = uiop->uio_offset;
6286         layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
6287             np->n_fhp->nfh_len, off, rwaccess, &rflp, &recalled);
6288         if (layp == NULL || rflp == NULL) {
6289                 if (recalled != 0) {
6290                         NFSFREECRED(newcred);
6291                         if (lckp != NULL)
6292                                 nfscl_lockderef(lckp);
6293                         nfscl_relref(nmp);
6294                         return (EIO);
6295                 }
6296                 if (layp != NULL) {
6297                         nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
6298                         layp = NULL;
6299                 }
6300                 /* Try and get a Layout, if it is supported. */
6301                 if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
6302                     (np->n_flag & NWRITEOPENED) != 0)
6303                         iolaymode = NFSLAYOUTIOMODE_RW;
6304                 else
6305                         iolaymode = NFSLAYOUTIOMODE_READ;
6306                 error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
6307                     rwaccess, NULL, &stateid, off, &layp, newcred, p);
6308                 if (error != 0) {
6309                         NFSLOCKNODE(np);
6310                         np->n_flag |= NNOLAYOUT;
6311                         NFSUNLOCKNODE(np);
6312                         if (lckp != NULL)
6313                                 nfscl_lockderef(lckp);
6314                         NFSFREECRED(newcred);
6315                         if (layp != NULL)
6316                                 nfscl_rellayout(layp, 0);
6317                         nfscl_relref(nmp);
6318                         return (error);
6319                 }
6320         }
6321
6322         /*
6323          * Loop around finding a layout that works for the first part of
6324          * this I/O operation, and then call the function that actually
6325          * does the RPC.
6326          */
6327         eof = 0;
6328         len = (uint64_t)uiop->uio_resid;
6329         while (len > 0 && error == 0 && eof == 0) {
6330                 off = uiop->uio_offset;
6331                 error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
6332                 if (error == 0) {
6333                         oresid = xfer = (uint64_t)uiop->uio_resid;
6334                         if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
6335                                 xfer = rflp->nfsfl_end - rflp->nfsfl_off;
6336                         /*
6337                          * For Flex File layout with mirrored DSs, select one
6338                          * of them at random for reads. For writes and commits,
6339                          * do all mirrors.
6340                          */
6341                         m = NULL;
6342                         tdrpc = drpc = NULL;
6343                         firstmirror = 0;
6344                         mirrorcnt = 1;
6345                         if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
6346                             (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
6347                                 if (rwaccess == NFSV4OPEN_ACCESSREAD) {
6348                                         firstmirror = arc4random() % mirrorcnt;
6349                                         mirrorcnt = firstmirror + 1;
6350                                 } else {
6351                                         if (docommit == 0) {
6352                                                 /*
6353                                                  * Save values, so uiop can be
6354                                                  * rolled back upon a write
6355                                                  * error.
6356                                                  */
6357                                                 offs = uiop->uio_offset;
6358                                                 resid = uiop->uio_resid;
6359                                                 iovbase =
6360                                                     uiop->uio_iov->iov_base;
6361                                                 iovlen = uiop->uio_iov->iov_len;
6362                                                 m = nfsm_uiombuflist(uiop, len,
6363                                                     0);
6364                                                 if (m == NULL) {
6365                                                         error = EFAULT;
6366                                                         break;
6367                                                 }
6368                                         }
6369                                         tdrpc = drpc = malloc(sizeof(*drpc) *
6370                                             (mirrorcnt - 1), M_TEMP, M_WAITOK |
6371                                             M_ZERO);
6372                                 }
6373                         }
6374                         for (i = firstmirror; i < mirrorcnt && error == 0; i++){
6375                                 m2 = NULL;
6376                                 if (m != NULL && i < mirrorcnt - 1)
6377                                         m2 = m_copym(m, 0, M_COPYALL, M_WAITOK);
6378                                 else {
6379                                         m2 = m;
6380                                         m = NULL;
6381                                 }
6382                                 if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) {
6383                                         dev = rflp->nfsfl_ffm[i].dev;
6384                                         dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6385                                             rflp->nfsfl_ffm[i].devp);
6386                                 } else {
6387                                         dev = rflp->nfsfl_dev;
6388                                         dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6389                                             rflp->nfsfl_devp);
6390                                 }
6391                                 if (dip != NULL) {
6392                                         if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
6393                                             != 0)
6394                                                 error = nfscl_dofflayoutio(vp,
6395                                                     uiop, iomode, must_commit,
6396                                                     &eof, &stateid, rwaccess,
6397                                                     dip, layp, rflp, off, xfer,
6398                                                     i, docommit, m2, tdrpc,
6399                                                     newcred, p);
6400                                         else
6401                                                 error = nfscl_doflayoutio(vp,
6402                                                     uiop, iomode, must_commit,
6403                                                     &eof, &stateid, rwaccess,
6404                                                     dip, layp, rflp, off, xfer,
6405                                                     docommit, newcred, p);
6406                                         nfscl_reldevinfo(dip);
6407                                 } else {
6408                                         if (m2 != NULL)
6409                                                 m_freem(m2);
6410                                         error = EIO;
6411                                 }
6412                                 tdrpc++;
6413                         }
6414                         if (m != NULL)
6415                                 m_freem(m);
6416                         tdrpc = drpc;
6417                         timo = hz / 50;         /* Wait for 20msec. */
6418                         if (timo < 1)
6419                                 timo = 1;
6420                         for (i = firstmirror; i < mirrorcnt - 1 &&
6421                             tdrpc != NULL; i++, tdrpc++) {
6422                                 /*
6423                                  * For the unused drpc entries, both inprog and
6424                                  * err == 0, so this loop won't break.
6425                                  */
6426                                 while (tdrpc->inprog != 0 && tdrpc->done == 0)
6427                                         tsleep(&tdrpc->tsk, PVFS, "clrpcio",
6428                                             timo);
6429                                 if (error == 0 && tdrpc->err != 0)
6430                                         error = tdrpc->err;
6431                                 if (rwaccess != NFSV4OPEN_ACCESSREAD &&
6432                                     docommit == 0 && *must_commit == 0 &&
6433                                     tdrpc->must_commit == 1)
6434                                         *must_commit = 1;
6435                         }
6436                         free(drpc, M_TEMP);
6437                         if (error == 0) {
6438                                 if (mirrorcnt > 1 && rwaccess ==
6439                                     NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6440                                         NFSLOCKCLSTATE();
6441                                         layp->nfsly_flags |= NFSLY_WRITTEN;
6442                                         NFSUNLOCKCLSTATE();
6443                                 }
6444                                 lastbyte = off + xfer - 1;
6445                                 NFSLOCKCLSTATE();
6446                                 if (lastbyte > layp->nfsly_lastbyte)
6447                                         layp->nfsly_lastbyte = lastbyte;
6448                                 NFSUNLOCKCLSTATE();
6449                         } else if (error == NFSERR_OPENMODE &&
6450                             rwaccess == NFSV4OPEN_ACCESSREAD) {
6451                                 NFSLOCKMNT(nmp);
6452                                 nmp->nm_state |= NFSSTA_OPENMODE;
6453                                 NFSUNLOCKMNT(nmp);
6454                         } else if ((error == NFSERR_NOSPC ||
6455                             error == NFSERR_IO || error == NFSERR_NXIO) &&
6456                             nmp->nm_minorvers == NFSV42_MINORVERSION) {
6457                                 if (docommit != 0)
6458                                         op = NFSV4OP_COMMIT;
6459                                 else if (rwaccess == NFSV4OPEN_ACCESSREAD)
6460                                         op = NFSV4OP_READ;
6461                                 else
6462                                         op = NFSV4OP_WRITE;
6463                                 nfsrpc_layouterror(nmp, np->n_fhp->nfh_fh,
6464                                     np->n_fhp->nfh_len, off, xfer,
6465                                     &layp->nfsly_stateid, newcred, p, error, op,
6466                                     dip->nfsdi_deviceid);
6467                                 error = EIO;
6468                         } else
6469                                 error = EIO;
6470                         if (error == 0)
6471                                 len -= (oresid - (uint64_t)uiop->uio_resid);
6472                         else if (mirrorcnt > 1 && rwaccess ==
6473                             NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6474                                 /*
6475                                  * In case the rpc gets retried, roll the
6476                                  * uio fields changed by nfsm_uiombuflist()
6477                                  * back.
6478                                  */
6479                                 uiop->uio_offset = offs;
6480                                 uiop->uio_resid = resid;
6481                                 uiop->uio_iov->iov_base = iovbase;
6482                                 uiop->uio_iov->iov_len = iovlen;
6483                         }
6484                 }
6485         }
6486         if (lckp != NULL)
6487                 nfscl_lockderef(lckp);
6488         NFSFREECRED(newcred);
6489         nfscl_rellayout(layp, 0);
6490         nfscl_relref(nmp);
6491         return (error);
6492 }
6493
6494 /*
6495  * Find a file layout that will handle the first bytes of the requested
6496  * range and return the information from it needed to the I/O operation.
6497  */
6498 int
6499 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
6500     struct nfsclflayout **retflpp)
6501 {
6502         struct nfsclflayout *flp, *nflp, *rflp;
6503         uint32_t rw;
6504
6505         rflp = NULL;
6506         rw = rwaccess;
6507         /* For reading, do the Read list first and then the Write list. */
6508         do {
6509                 if (rw == NFSV4OPEN_ACCESSREAD)
6510                         flp = LIST_FIRST(&lyp->nfsly_flayread);
6511                 else
6512                         flp = LIST_FIRST(&lyp->nfsly_flayrw);
6513                 while (flp != NULL) {
6514                         nflp = LIST_NEXT(flp, nfsfl_list);
6515                         if (flp->nfsfl_off > off)
6516                                 break;
6517                         if (flp->nfsfl_end > off &&
6518                             (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
6519                                 rflp = flp;
6520                         flp = nflp;
6521                 }
6522                 if (rw == NFSV4OPEN_ACCESSREAD)
6523                         rw = NFSV4OPEN_ACCESSWRITE;
6524                 else
6525                         rw = 0;
6526         } while (rw != 0);
6527         if (rflp != NULL) {
6528                 /* This one covers the most bytes starting at off. */
6529                 *retflpp = rflp;
6530                 return (0);
6531         }
6532         return (EIO);
6533 }
6534
6535 /*
6536  * Do I/O using an NFSv4.1 or NFSv4.2 file layout.
6537  */
6538 static int
6539 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6540     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6541     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6542     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
6543 {
6544         uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
6545         int commit_thru_mds, error, stripe_index, stripe_pos, minorvers;
6546         struct nfsnode *np;
6547         struct nfsfh *fhp;
6548         struct nfsclds **dspp;
6549
6550         np = VTONFS(vp);
6551         rel_off = off - flp->nfsfl_patoff;
6552         stripe_unit_size = flp->nfsfl_util & NFSFLAYUTIL_STRIPE_MASK;
6553         stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
6554             dp->nfsdi_stripecnt;
6555         transfer = stripe_unit_size - (rel_off % stripe_unit_size);
6556         error = 0;
6557
6558         /* Loop around, doing I/O for each stripe unit. */
6559         while (len > 0 && error == 0) {
6560                 stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
6561                 dspp = nfsfldi_addr(dp, stripe_index);
6562                 if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0)
6563                         minorvers = NFSV42_MINORVERSION;
6564                 else
6565                         minorvers = NFSV41_MINORVERSION;
6566                 if (len > transfer && docommit == 0)
6567                         xfer = transfer;
6568                 else
6569                         xfer = len;
6570                 if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
6571                         /* Dense layout. */
6572                         if (stripe_pos >= flp->nfsfl_fhcnt)
6573                                 return (EIO);
6574                         fhp = flp->nfsfl_fh[stripe_pos];
6575                         io_off = (rel_off / (stripe_unit_size *
6576                             dp->nfsdi_stripecnt)) * stripe_unit_size +
6577                             rel_off % stripe_unit_size;
6578                 } else {
6579                         /* Sparse layout. */
6580                         if (flp->nfsfl_fhcnt > 1) {
6581                                 if (stripe_index >= flp->nfsfl_fhcnt)
6582                                         return (EIO);
6583                                 fhp = flp->nfsfl_fh[stripe_index];
6584                         } else if (flp->nfsfl_fhcnt == 1)
6585                                 fhp = flp->nfsfl_fh[0];
6586                         else
6587                                 fhp = np->n_fhp;
6588                         io_off = off;
6589                 }
6590                 if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
6591                         commit_thru_mds = 1;
6592                         if (docommit != 0)
6593                                 error = EIO;
6594                 } else {
6595                         commit_thru_mds = 0;
6596                         NFSLOCKNODE(np);
6597                         np->n_flag |= NDSCOMMIT;
6598                         NFSUNLOCKNODE(np);
6599                 }
6600                 if (docommit != 0) {
6601                         if (error == 0)
6602                                 error = nfsrpc_commitds(vp, io_off, xfer,
6603                                     *dspp, fhp, NFS_VER4, minorvers, cred, p);
6604                         if (error == 0) {
6605                                 /*
6606                                  * Set both eof and uio_resid = 0 to end any
6607                                  * loops.
6608                                  */
6609                                 *eofp = 1;
6610                                 uiop->uio_resid = 0;
6611                         } else {
6612                                 NFSLOCKNODE(np);
6613                                 np->n_flag &= ~NDSCOMMIT;
6614                                 NFSUNLOCKNODE(np);
6615                         }
6616                 } else if (rwflag == NFSV4OPEN_ACCESSREAD)
6617                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6618                             io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p);
6619                 else {
6620                         error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
6621                             stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
6622                             0, NFS_VER4, minorvers, cred, p);
6623                         if (error == 0) {
6624                                 NFSLOCKCLSTATE();
6625                                 lyp->nfsly_flags |= NFSLY_WRITTEN;
6626                                 NFSUNLOCKCLSTATE();
6627                         }
6628                 }
6629                 if (error == 0) {
6630                         transfer = stripe_unit_size;
6631                         stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
6632                         len -= xfer;
6633                         off += xfer;
6634                 }
6635         }
6636         return (error);
6637 }
6638
6639 /*
6640  * Do I/O using an NFSv4.1 flex file layout.
6641  */
6642 static int
6643 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6644     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6645     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6646     uint64_t len, int mirror, int docommit, struct mbuf *mp,
6647     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6648 {
6649         uint64_t xfer;
6650         int error;
6651         struct nfsnode *np;
6652         struct nfsfh *fhp;
6653         struct nfsclds **dspp;
6654         struct ucred *tcred;
6655         struct mbuf *m, *m2;
6656         uint32_t copylen;
6657
6658         np = VTONFS(vp);
6659         error = 0;
6660         NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6661             (uintmax_t)len);
6662         /* Loop around, doing I/O for each stripe unit. */
6663         while (len > 0 && error == 0) {
6664                 dspp = nfsfldi_addr(dp, 0);
6665                 fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6666                 stateidp = &flp->nfsfl_ffm[mirror].st;
6667                 NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6668                     mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6669                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6670                         tcred = NFSNEWCRED(cred);
6671                         tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6672                         tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6673                         tcred->cr_ngroups = 1;
6674                 } else
6675                         tcred = cred;
6676                 if (rwflag == NFSV4OPEN_ACCESSREAD)
6677                         copylen = dp->nfsdi_rsize;
6678                 else {
6679                         copylen = dp->nfsdi_wsize;
6680                         if (len > copylen && mp != NULL) {
6681                                 /*
6682                                  * When a mirrored configuration needs to do
6683                                  * multiple writes to each mirror, all writes
6684                                  * except the last one must be a multiple of
6685                                  * 4 bytes.  This is required so that the XDR
6686                                  * does not need padding.
6687                                  * If possible, clip the size to an exact
6688                                  * multiple of the mbuf length, so that the
6689                                  * split will be on an mbuf boundary.
6690                                  */
6691                                 copylen &= 0xfffffffc;
6692                                 if (copylen > mp->m_len)
6693                                         copylen = copylen / mp->m_len *
6694                                             mp->m_len;
6695                         }
6696                 }
6697                 NFSLOCKNODE(np);
6698                 np->n_flag |= NDSCOMMIT;
6699                 NFSUNLOCKNODE(np);
6700                 if (len > copylen && docommit == 0)
6701                         xfer = copylen;
6702                 else
6703                         xfer = len;
6704                 if (docommit != 0) {
6705                         if (error == 0) {
6706                                 /*
6707                                  * Do last mirrored DS commit with this thread.
6708                                  */
6709                                 if (mirror < flp->nfsfl_mirrorcnt - 1)
6710                                         error = nfsio_commitds(vp, off, xfer,
6711                                             *dspp, fhp, dp->nfsdi_vers,
6712                                             dp->nfsdi_minorvers, drpc, tcred,
6713                                             p);
6714                                 else
6715                                         error = nfsrpc_commitds(vp, off, xfer,
6716                                             *dspp, fhp, dp->nfsdi_vers,
6717                                             dp->nfsdi_minorvers, tcred, p);
6718                                 NFSCL_DEBUG(4, "commitds=%d\n", error);
6719                                 if (error != 0 && error != EACCES && error !=
6720                                     ESTALE) {
6721                                         NFSCL_DEBUG(4,
6722                                             "DS layreterr for commit\n");
6723                                         nfscl_dserr(NFSV4OP_COMMIT, error, dp,
6724                                             lyp, *dspp);
6725                                 }
6726                         }
6727                         NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6728                         if (error == 0) {
6729                                 /*
6730                                  * Set both eof and uio_resid = 0 to end any
6731                                  * loops.
6732                                  */
6733                                 *eofp = 1;
6734                                 uiop->uio_resid = 0;
6735                         } else {
6736                                 NFSLOCKNODE(np);
6737                                 np->n_flag &= ~NDSCOMMIT;
6738                                 NFSUNLOCKNODE(np);
6739                         }
6740                 } else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6741                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6742                             off, xfer, fhp, 1, dp->nfsdi_vers,
6743                             dp->nfsdi_minorvers, tcred, p);
6744                         NFSCL_DEBUG(4, "readds=%d\n", error);
6745                         if (error != 0 && error != EACCES && error != ESTALE) {
6746                                 NFSCL_DEBUG(4, "DS layreterr for read\n");
6747                                 nfscl_dserr(NFSV4OP_READ, error, dp, lyp,
6748                                     *dspp);
6749                         }
6750                 } else {
6751                         if (flp->nfsfl_mirrorcnt == 1) {
6752                                 error = nfsrpc_writeds(vp, uiop, iomode,
6753                                     must_commit, stateidp, *dspp, off, xfer,
6754                                     fhp, 0, 1, dp->nfsdi_vers,
6755                                     dp->nfsdi_minorvers, tcred, p);
6756                                 if (error == 0) {
6757                                         NFSLOCKCLSTATE();
6758                                         lyp->nfsly_flags |= NFSLY_WRITTEN;
6759                                         NFSUNLOCKCLSTATE();
6760                                 }
6761                         } else {
6762                                 m = mp;
6763                                 if (xfer < len) {
6764                                         /* The mbuf list must be split. */
6765                                         m2 = nfsm_split(mp, xfer);
6766                                         if (m2 != NULL)
6767                                                 mp = m2;
6768                                         else {
6769                                                 m_freem(mp);
6770                                                 error = EIO;
6771                                         }
6772                                 }
6773                                 NFSCL_DEBUG(4, "mcopy len=%jd xfer=%jd\n",
6774                                     (uintmax_t)len, (uintmax_t)xfer);
6775                                 /*
6776                                  * Do last write to a mirrored DS with this
6777                                  * thread.
6778                                  */
6779                                 if (error == 0) {
6780                                         if (mirror < flp->nfsfl_mirrorcnt - 1)
6781                                                 error = nfsio_writedsmir(vp,
6782                                                     iomode, must_commit,
6783                                                     stateidp, *dspp, off,
6784                                                     xfer, fhp, m,
6785                                                     dp->nfsdi_vers,
6786                                                     dp->nfsdi_minorvers, drpc,
6787                                                     tcred, p);
6788                                         else
6789                                                 error = nfsrpc_writedsmir(vp,
6790                                                     iomode, must_commit,
6791                                                     stateidp, *dspp, off,
6792                                                     xfer, fhp, m,
6793                                                     dp->nfsdi_vers,
6794                                                     dp->nfsdi_minorvers, tcred,
6795                                                     p);
6796                                 }
6797                                 NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6798                                 if (error != 0 && error != EACCES && error !=
6799                                     ESTALE) {
6800                                         NFSCL_DEBUG(4,
6801                                             "DS layreterr for write\n");
6802                                         nfscl_dserr(NFSV4OP_WRITE, error, dp,
6803                                             lyp, *dspp);
6804                                 }
6805                         }
6806                 }
6807                 NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6808                 if (error == 0) {
6809                         len -= xfer;
6810                         off += xfer;
6811                 }
6812                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6813                         NFSFREECRED(tcred);
6814         }
6815         NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6816         return (error);
6817 }
6818
6819 /*
6820  * The actual read RPC done to a DS.
6821  */
6822 static int
6823 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6824     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6825     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6826 {
6827         uint32_t *tl;
6828         int attrflag, error, retlen;
6829         struct nfsrv_descript nfsd;
6830         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6831         struct nfsrv_descript *nd = &nfsd;
6832         struct nfssockreq *nrp;
6833         struct nfsvattr na;
6834
6835         nd->nd_mrep = NULL;
6836         if (vers == 0 || vers == NFS_VER4) {
6837                 nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6838                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6839                     NULL);
6840                 vers = NFS_VER4;
6841                 NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6842                 if (flex != 0)
6843                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6844                 else
6845                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6846         } else {
6847                 nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6848                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6849                     NULL);
6850                 NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READ]);
6851                 NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READDS]);
6852                 NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6853         }
6854         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6855         txdr_hyper(io_off, tl);
6856         *(tl + 2) = txdr_unsigned(len);
6857         nrp = dsp->nfsclds_sockp;
6858         NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6859         if (nrp == NULL)
6860                 /* If NULL, use the MDS socket. */
6861                 nrp = &nmp->nm_sockreq;
6862         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6863             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6864         NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6865             error);
6866         if (error != 0)
6867                 return (error);
6868         if (vers == NFS_VER3) {
6869                 error = nfscl_postop_attr(nd, &na, &attrflag);
6870                 NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6871                 if (error != 0)
6872                         goto nfsmout;
6873         }
6874         if (nd->nd_repstat != 0) {
6875                 error = nd->nd_repstat;
6876                 goto nfsmout;
6877         }
6878         if (vers == NFS_VER3) {
6879                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6880                 *eofp = fxdr_unsigned(int, *(tl + 1));
6881         } else {
6882                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6883                 *eofp = fxdr_unsigned(int, *tl);
6884         }
6885         NFSM_STRSIZ(retlen, len);
6886         NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6887         error = nfsm_mbufuio(nd, uiop, retlen);
6888 nfsmout:
6889         if (nd->nd_mrep != NULL)
6890                 m_freem(nd->nd_mrep);
6891         return (error);
6892 }
6893
6894 /*
6895  * The actual write RPC done to a DS.
6896  */
6897 static int
6898 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6899     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6900     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6901     struct ucred *cred, NFSPROC_T *p)
6902 {
6903         uint32_t *tl;
6904         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6905         int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6906         int32_t backup;
6907         struct nfsrv_descript nfsd;
6908         struct nfsrv_descript *nd = &nfsd;
6909         struct nfssockreq *nrp;
6910         struct nfsvattr na;
6911
6912         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6913         nd->nd_mrep = NULL;
6914         if (vers == 0 || vers == NFS_VER4) {
6915                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6916                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6917                     NULL);
6918                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6919                 vers = NFS_VER4;
6920                 if (flex != 0)
6921                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6922                 else
6923                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6924                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6925         } else {
6926                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6927                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6928                     NULL);
6929                 NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6930                 NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6931                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6932                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6933         }
6934         txdr_hyper(io_off, tl);
6935         tl += 2;
6936         if (vers == NFS_VER3)
6937                 *tl++ = txdr_unsigned(len);
6938         *tl++ = txdr_unsigned(*iomode);
6939         *tl = txdr_unsigned(len);
6940         error = nfsm_uiombuf(nd, uiop, len);
6941         if (error != 0) {
6942                 m_freem(nd->nd_mreq);
6943                 return (error);
6944         }
6945         nrp = dsp->nfsclds_sockp;
6946         if (nrp == NULL)
6947                 /* If NULL, use the MDS socket. */
6948                 nrp = &nmp->nm_sockreq;
6949         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6950             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6951         NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6952             nd->nd_repstat);
6953         if (error != 0)
6954                 return (error);
6955         if (nd->nd_repstat != 0) {
6956                 /*
6957                  * In case the rpc gets retried, roll
6958                  * the uio fields changed by nfsm_uiombuf()
6959                  * back.
6960                  */
6961                 uiop->uio_offset -= len;
6962                 uiop->uio_resid += len;
6963                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - len;
6964                 uiop->uio_iov->iov_len += len;
6965                 error = nd->nd_repstat;
6966         } else {
6967                 if (vers == NFS_VER3) {
6968                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6969                             NULL);
6970                         NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6971                         if (error != 0)
6972                                 goto nfsmout;
6973                 }
6974                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6975                 rlen = fxdr_unsigned(int, *tl++);
6976                 NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6977                 if (rlen == 0) {
6978                         error = NFSERR_IO;
6979                         goto nfsmout;
6980                 } else if (rlen < len) {
6981                         backup = len - rlen;
6982                         uiop->uio_iov->iov_base =
6983                             (char *)uiop->uio_iov->iov_base - backup;
6984                         uiop->uio_iov->iov_len += backup;
6985                         uiop->uio_offset -= backup;
6986                         uiop->uio_resid += backup;
6987                         len = rlen;
6988                 }
6989                 commit = fxdr_unsigned(int, *tl++);
6990
6991                 /*
6992                  * Return the lowest commitment level
6993                  * obtained by any of the RPCs.
6994                  */
6995                 if (committed == NFSWRITE_FILESYNC)
6996                         committed = commit;
6997                 else if (committed == NFSWRITE_DATASYNC &&
6998                     commit == NFSWRITE_UNSTABLE)
6999                         committed = commit;
7000                 if (commit_thru_mds != 0) {
7001                         NFSLOCKMNT(nmp);
7002                         if (!NFSHASWRITEVERF(nmp)) {
7003                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
7004                                 NFSSETWRITEVERF(nmp);
7005                         } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF) &&
7006                             *must_commit != 2) {
7007                                 *must_commit = 1;
7008                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
7009                         }
7010                         NFSUNLOCKMNT(nmp);
7011                 } else {
7012                         NFSLOCKDS(dsp);
7013                         if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
7014                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7015                                 dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
7016                         } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
7017                             *must_commit != 2) {
7018                                 *must_commit = 1;
7019                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7020                         }
7021                         NFSUNLOCKDS(dsp);
7022                 }
7023         }
7024 nfsmout:
7025         if (nd->nd_mrep != NULL)
7026                 m_freem(nd->nd_mrep);
7027         *iomode = committed;
7028         if (nd->nd_repstat != 0 && error == 0)
7029                 error = nd->nd_repstat;
7030         return (error);
7031 }
7032
7033 /*
7034  * The actual write RPC done to a DS.
7035  * This variant is called from a separate kernel process for mirrors.
7036  * Any short write is considered an IO error.
7037  */
7038 static int
7039 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
7040     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
7041     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
7042     struct ucred *cred, NFSPROC_T *p)
7043 {
7044         uint32_t *tl;
7045         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7046         int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
7047         struct nfsrv_descript nfsd;
7048         struct nfsrv_descript *nd = &nfsd;
7049         struct nfssockreq *nrp;
7050         struct nfsvattr na;
7051
7052         nd->nd_mrep = NULL;
7053         if (vers == 0 || vers == NFS_VER4) {
7054                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
7055                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7056                     NULL);
7057                 vers = NFS_VER4;
7058                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
7059                     minorvers);
7060                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7061                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
7062         } else {
7063                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
7064                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7065                     NULL);
7066                 NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
7067                 NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
7068                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
7069                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
7070         }
7071         txdr_hyper(io_off, tl);
7072         tl += 2;
7073         if (vers == NFS_VER3)
7074                 *tl++ = txdr_unsigned(len);
7075         *tl++ = txdr_unsigned(*iomode);
7076         *tl = txdr_unsigned(len);
7077         if (len > 0) {
7078                 /* Put data in mbuf chain. */
7079                 nd->nd_mb->m_next = m;
7080         }
7081         nrp = dsp->nfsclds_sockp;
7082         if (nrp == NULL)
7083                 /* If NULL, use the MDS socket. */
7084                 nrp = &nmp->nm_sockreq;
7085         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7086             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7087         NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
7088             nd->nd_repstat);
7089         if (error != 0)
7090                 return (error);
7091         if (nd->nd_repstat != 0)
7092                 error = nd->nd_repstat;
7093         else {
7094                 if (vers == NFS_VER3) {
7095                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
7096                             NULL);
7097                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
7098                             error);
7099                         if (error != 0)
7100                                 goto nfsmout;
7101                 }
7102                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
7103                 rlen = fxdr_unsigned(int, *tl++);
7104                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
7105                     rlen);
7106                 if (rlen != len) {
7107                         error = NFSERR_IO;
7108                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
7109                             len, rlen);
7110                         goto nfsmout;
7111                 }
7112                 commit = fxdr_unsigned(int, *tl++);
7113
7114                 /*
7115                  * Return the lowest commitment level
7116                  * obtained by any of the RPCs.
7117                  */
7118                 if (committed == NFSWRITE_FILESYNC)
7119                         committed = commit;
7120                 else if (committed == NFSWRITE_DATASYNC &&
7121                     commit == NFSWRITE_UNSTABLE)
7122                         committed = commit;
7123                 NFSLOCKDS(dsp);
7124                 if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
7125                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7126                         dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
7127                 } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
7128                     *must_commit != 2) {
7129                         *must_commit = 1;
7130                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7131                 }
7132                 NFSUNLOCKDS(dsp);
7133         }
7134 nfsmout:
7135         if (nd->nd_mrep != NULL)
7136                 m_freem(nd->nd_mrep);
7137         *iomode = committed;
7138         if (nd->nd_repstat != 0 && error == 0)
7139                 error = nd->nd_repstat;
7140         return (error);
7141 }
7142
7143 /*
7144  * Start up the thread that will execute nfsrpc_writedsmir().
7145  */
7146 static void
7147 start_writedsmir(void *arg, int pending)
7148 {
7149         struct nfsclwritedsdorpc *drpc;
7150
7151         drpc = (struct nfsclwritedsdorpc *)arg;
7152         drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
7153             &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
7154             drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
7155             drpc->p);
7156         drpc->done = 1;
7157         crfree(drpc->cred);
7158         NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
7159 }
7160
7161 /*
7162  * Set up the write DS mirror call for the pNFS I/O thread.
7163  */
7164 static int
7165 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
7166     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
7167     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
7168     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7169 {
7170         int error, ret;
7171
7172         error = 0;
7173         drpc->done = 0;
7174         drpc->vp = vp;
7175         drpc->iomode = *iomode;
7176         drpc->must_commit = *must_commit;
7177         drpc->stateidp = stateidp;
7178         drpc->dsp = dsp;
7179         drpc->off = off;
7180         drpc->len = len;
7181         drpc->fhp = fhp;
7182         drpc->m = m;
7183         drpc->vers = vers;
7184         drpc->minorvers = minorvers;
7185         drpc->cred = crhold(cred);
7186         drpc->p = p;
7187         drpc->inprog = 0;
7188         ret = EIO;
7189         if (nfs_pnfsiothreads != 0) {
7190                 ret = nfs_pnfsio(start_writedsmir, drpc);
7191                 NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
7192         }
7193         if (ret != 0) {
7194                 error = nfsrpc_writedsmir(vp, iomode, &drpc->must_commit,
7195                     stateidp, dsp, off, len, fhp, m, vers, minorvers, cred, p);
7196                 crfree(drpc->cred);
7197         }
7198         NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
7199         return (error);
7200 }
7201
7202 /*
7203  * Free up the nfsclds structure.
7204  */
7205 void
7206 nfscl_freenfsclds(struct nfsclds *dsp)
7207 {
7208         int i;
7209
7210         if (dsp == NULL)
7211                 return;
7212         if (dsp->nfsclds_sockp != NULL) {
7213                 NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
7214                 NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
7215                 free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
7216                 free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
7217         }
7218         NFSFREEMUTEX(&dsp->nfsclds_mtx);
7219         NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
7220         for (i = 0; i < NFSV4_CBSLOTS; i++) {
7221                 if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
7222                         m_freem(
7223                             dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
7224         }
7225         free(dsp, M_NFSCLDS);
7226 }
7227
7228 static enum nfsclds_state
7229 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
7230     struct nfsclds **retdspp, uint32_t *sequencep)
7231 {
7232         struct nfsclds *dsp;
7233         int fndseq;
7234
7235         /*
7236          * Search the list of nfsclds structures for one with the same
7237          * server.
7238          */
7239         fndseq = 0;
7240         TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
7241                 if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
7242                     dsp->nfsclds_servownlen != 0 &&
7243                     !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
7244                     dsp->nfsclds_servownlen) &&
7245                     dsp->nfsclds_sess.nfsess_defunct == 0) {
7246                         NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
7247                             TAILQ_FIRST(&nmp->nm_sess), dsp,
7248                             dsp->nfsclds_flags);
7249                         if (fndseq == 0) {
7250                                 /* Get sequenceid# from first entry. */
7251                                 *sequencep =
7252                                     dsp->nfsclds_sess.nfsess_sequenceid;
7253                                 fndseq = 1;
7254                         }
7255                         /* Server major id matches. */
7256                         if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
7257                                 *retdspp = dsp;
7258                                 return (NFSDSP_USETHISSESSION);
7259                         }
7260                 }
7261         }
7262         if (fndseq != 0)
7263                 return (NFSDSP_SEQTHISSESSION);
7264         return (NFSDSP_NOTFOUND);
7265 }
7266
7267 /*
7268  * NFS commit rpc to a NFSv4.1 DS.
7269  */
7270 static int
7271 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7272     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
7273     NFSPROC_T *p)
7274 {
7275         uint32_t *tl;
7276         struct nfsrv_descript nfsd, *nd = &nfsd;
7277         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7278         struct nfssockreq *nrp;
7279         struct nfsvattr na;
7280         int attrflag, error;
7281
7282         nd->nd_mrep = NULL;
7283         if (vers == 0 || vers == NFS_VER4) {
7284                 nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
7285                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7286                     NULL);
7287                 vers = NFS_VER4;
7288         } else {
7289                 nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
7290                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7291                     NULL);
7292                 NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMIT]);
7293                 NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMITDS]);
7294         }
7295         NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
7296             minorvers);
7297         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7298         txdr_hyper(offset, tl);
7299         tl += 2;
7300         *tl = txdr_unsigned(cnt);
7301         nrp = dsp->nfsclds_sockp;
7302         if (nrp == NULL)
7303                 /* If NULL, use the MDS socket. */
7304                 nrp = &nmp->nm_sockreq;
7305         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7306             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7307         NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
7308             nd->nd_repstat);
7309         if (error != 0)
7310                 return (error);
7311         if (nd->nd_repstat == 0) {
7312                 if (vers == NFS_VER3) {
7313                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
7314                             NULL);
7315                         NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
7316                         if (error != 0)
7317                                 goto nfsmout;
7318                 }
7319                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
7320                 NFSLOCKDS(dsp);
7321                 if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
7322                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7323                         error = NFSERR_STALEWRITEVERF;
7324                 }
7325                 NFSUNLOCKDS(dsp);
7326         }
7327 nfsmout:
7328         if (error == 0 && nd->nd_repstat != 0)
7329                 error = nd->nd_repstat;
7330         m_freem(nd->nd_mrep);
7331         return (error);
7332 }
7333
7334 /*
7335  * Start up the thread that will execute nfsrpc_commitds().
7336  */
7337 static void
7338 start_commitds(void *arg, int pending)
7339 {
7340         struct nfsclwritedsdorpc *drpc;
7341
7342         drpc = (struct nfsclwritedsdorpc *)arg;
7343         drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
7344             drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
7345             drpc->p);
7346         drpc->done = 1;
7347         crfree(drpc->cred);
7348         NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
7349 }
7350
7351 /*
7352  * Set up the commit DS mirror call for the pNFS I/O thread.
7353  */
7354 static int
7355 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7356     struct nfsfh *fhp, int vers, int minorvers,
7357     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7358 {
7359         int error, ret;
7360
7361         error = 0;
7362         drpc->done = 0;
7363         drpc->vp = vp;
7364         drpc->off = offset;
7365         drpc->len = cnt;
7366         drpc->dsp = dsp;
7367         drpc->fhp = fhp;
7368         drpc->vers = vers;
7369         drpc->minorvers = minorvers;
7370         drpc->cred = crhold(cred);
7371         drpc->p = p;
7372         drpc->inprog = 0;
7373         ret = EIO;
7374         if (nfs_pnfsiothreads != 0) {
7375                 ret = nfs_pnfsio(start_commitds, drpc);
7376                 NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
7377         }
7378         if (ret != 0) {
7379                 error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
7380                     minorvers, cred, p);
7381                 crfree(drpc->cred);
7382         }
7383         NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
7384         return (error);
7385 }
7386
7387 /*
7388  * NFS Advise rpc
7389  */
7390 int
7391 nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise,
7392     struct ucred *cred, NFSPROC_T *p)
7393 {
7394         u_int32_t *tl;
7395         struct nfsrv_descript nfsd, *nd = &nfsd;
7396         nfsattrbit_t hints;
7397         int error;
7398
7399         NFSZERO_ATTRBIT(&hints);
7400         if (advise == POSIX_FADV_WILLNEED)
7401                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7402         else if (advise == POSIX_FADV_DONTNEED)
7403                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7404         else
7405                 return (0);
7406         NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp, cred);
7407         nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7408         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
7409         txdr_hyper(offset, tl);
7410         tl += 2;
7411         txdr_hyper(cnt, tl);
7412         nfsrv_putattrbit(nd, &hints);
7413         error = nfscl_request(nd, vp, p, cred);
7414         if (error != 0)
7415                 return (error);
7416         if (nd->nd_repstat != 0)
7417                 error = nd->nd_repstat;
7418         m_freem(nd->nd_mrep);
7419         return (error);
7420 }
7421
7422 #ifdef notyet
7423 /*
7424  * NFS advise rpc to a NFSv4.2 DS.
7425  */
7426 static int
7427 nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7428     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7429     struct ucred *cred, NFSPROC_T *p)
7430 {
7431         uint32_t *tl;
7432         struct nfsrv_descript nfsd, *nd = &nfsd;
7433         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7434         struct nfssockreq *nrp;
7435         nfsattrbit_t hints;
7436         int error;
7437
7438         /* For NFS DSs prior to NFSv4.2, just return OK. */
7439         if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION)
7440                 return (0);
7441         NFSZERO_ATTRBIT(&hints);
7442         if (advise == POSIX_FADV_WILLNEED)
7443                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7444         else if (advise == POSIX_FADV_DONTNEED)
7445                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7446         else
7447                 return (0);
7448         nd->nd_mrep = NULL;
7449         nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh,
7450             fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers, NULL);
7451         vers = NFS_VER4;
7452         NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers,
7453             minorvers);
7454         nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7455         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7456         txdr_hyper(offset, tl);
7457         tl += 2;
7458         *tl = txdr_unsigned(cnt);
7459         nfsrv_putattrbit(nd, &hints);
7460         nrp = dsp->nfsclds_sockp;
7461         if (nrp == NULL)
7462                 /* If NULL, use the MDS socket. */
7463                 nrp = &nmp->nm_sockreq;
7464         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7465             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7466         NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error,
7467             nd->nd_repstat);
7468         if (error != 0)
7469                 return (error);
7470         if (nd->nd_repstat != 0)
7471                 error = nd->nd_repstat;
7472         m_freem(nd->nd_mrep);
7473         return (error);
7474 }
7475
7476 /*
7477  * Start up the thread that will execute nfsrpc_commitds().
7478  */
7479 static void
7480 start_adviseds(void *arg, int pending)
7481 {
7482         struct nfsclwritedsdorpc *drpc;
7483
7484         drpc = (struct nfsclwritedsdorpc *)arg;
7485         drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len,
7486             drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers,
7487             drpc->cred, drpc->p);
7488         drpc->done = 1;
7489         crfree(drpc->cred);
7490         NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err);
7491 }
7492
7493 /*
7494  * Set up the advise DS mirror call for the pNFS I/O thread.
7495  */
7496 static int
7497 nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7498     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7499     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7500 {
7501         int error, ret;
7502
7503         error = 0;
7504         drpc->done = 0;
7505         drpc->vp = vp;
7506         drpc->off = offset;
7507         drpc->len = cnt;
7508         drpc->advise = advise;
7509         drpc->dsp = dsp;
7510         drpc->fhp = fhp;
7511         drpc->vers = vers;
7512         drpc->minorvers = minorvers;
7513         drpc->cred = crhold(cred);
7514         drpc->p = p;
7515         drpc->inprog = 0;
7516         ret = EIO;
7517         if (nfs_pnfsiothreads != 0) {
7518                 ret = nfs_pnfsio(start_adviseds, drpc);
7519                 NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret);
7520         }
7521         if (ret != 0) {
7522                 error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers,
7523                     minorvers, cred, p);
7524                 crfree(drpc->cred);
7525         }
7526         NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error);
7527         return (error);
7528 }
7529 #endif  /* notyet */
7530
7531 /*
7532  * Do the Allocate operation, retrying for recovery.
7533  */
7534 int
7535 nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap,
7536     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
7537 {
7538         int error, expireret = 0, retrycnt, nostateid;
7539         uint32_t clidrev = 0;
7540         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7541         struct nfsfh *nfhp = NULL;
7542         nfsv4stateid_t stateid;
7543         off_t tmp_off;
7544         void *lckp;
7545
7546         if (len < 0)
7547                 return (EINVAL);
7548         if (len == 0)
7549                 return (0);
7550         tmp_off = off + len;
7551         NFSLOCKMNT(nmp);
7552         if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) {
7553                 NFSUNLOCKMNT(nmp);
7554                 return (EFBIG);
7555         }
7556         if (nmp->nm_clp != NULL)
7557                 clidrev = nmp->nm_clp->nfsc_clientidrev;
7558         NFSUNLOCKMNT(nmp);
7559         nfhp = VTONFS(vp)->n_fhp;
7560         retrycnt = 0;
7561         do {
7562                 lckp = NULL;
7563                 nostateid = 0;
7564                 nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
7565                     NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
7566                 if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
7567                     stateid.other[2] == 0) {
7568                         nostateid = 1;
7569                         NFSCL_DEBUG(1, "stateid0 in allocate\n");
7570                 }
7571
7572                 /*
7573                  * Not finding a stateid should probably never happen,
7574                  * but just return an error for this case.
7575                  */
7576                 if (nostateid != 0)
7577                         error = EIO;
7578                 else
7579                         error = nfsrpc_allocaterpc(vp, off, len, &stateid,
7580                             nap, attrflagp, cred, p);
7581                 if (error == NFSERR_STALESTATEID)
7582                         nfscl_initiate_recovery(nmp->nm_clp);
7583                 if (lckp != NULL)
7584                         nfscl_lockderef(lckp);
7585                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
7586                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
7587                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
7588                         (void) nfs_catnap(PZERO, error, "nfs_allocate");
7589                 } else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
7590                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
7591                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
7592                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
7593                         error = EIO;
7594                 }
7595                 retrycnt++;
7596         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
7597             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
7598             error == NFSERR_STALEDONTRECOVER ||
7599             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
7600             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
7601              expireret == 0 && clidrev != 0 && retrycnt < 4));
7602         if (error != 0 && retrycnt >= 4)
7603                 error = EIO;
7604         return (error);
7605 }
7606
7607 /*
7608  * The allocate RPC.
7609  */
7610 static int
7611 nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp,
7612     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
7613 {
7614         uint32_t *tl;
7615         int error;
7616         struct nfsrv_descript nfsd;
7617         struct nfsrv_descript *nd = &nfsd;
7618         nfsattrbit_t attrbits;
7619
7620         *attrflagp = 0;
7621         NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp, cred);
7622         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7623         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7624         txdr_hyper(off, tl); tl += 2;
7625         txdr_hyper(len, tl); tl += 2;
7626         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7627         NFSGETATTR_ATTRBIT(&attrbits);
7628         nfsrv_putattrbit(nd, &attrbits);
7629         error = nfscl_request(nd, vp, p, cred);
7630         if (error != 0)
7631                 return (error);
7632         if (nd->nd_repstat == 0) {
7633                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7634                 error = nfsm_loadattr(nd, nap);
7635                 if (error == 0)
7636                         *attrflagp = NFS_LATTR_NOSHRINK;
7637         } else
7638                 error = nd->nd_repstat;
7639 nfsmout:
7640         m_freem(nd->nd_mrep);
7641         return (error);
7642 }
7643
7644 /*
7645  * Set up the XDR arguments for the LayoutGet operation.
7646  */
7647 static void
7648 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
7649     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
7650     int layoutlen, int usecurstateid)
7651 {
7652         uint32_t *tl;
7653
7654         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
7655             NFSX_STATEID);
7656         *tl++ = newnfs_false;           /* Don't signal availability. */
7657         *tl++ = txdr_unsigned(layouttype);
7658         *tl++ = txdr_unsigned(iomode);
7659         txdr_hyper(offset, tl);
7660         tl += 2;
7661         txdr_hyper(len, tl);
7662         tl += 2;
7663         txdr_hyper(minlen, tl);
7664         tl += 2;
7665         if (usecurstateid != 0) {
7666                 /* Special stateid for Current stateid. */
7667                 *tl++ = txdr_unsigned(1);
7668                 *tl++ = 0;
7669                 *tl++ = 0;
7670                 *tl++ = 0;
7671         } else {
7672                 *tl++ = txdr_unsigned(stateidp->seqid);
7673                 NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
7674                 *tl++ = stateidp->other[0];
7675                 *tl++ = stateidp->other[1];
7676                 *tl++ = stateidp->other[2];
7677         }
7678         *tl = txdr_unsigned(layoutlen);
7679 }
7680
7681 /*
7682  * Parse the reply for a successful LayoutGet operation.
7683  */
7684 static int
7685 nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd,
7686     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp)
7687 {
7688         uint32_t *tl;
7689         struct nfsclflayout *flp, *prevflp, *tflp;
7690         int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
7691         int m, mirrorcnt;
7692         uint64_t retlen, off;
7693         struct nfsfh *nfhp;
7694         uint8_t *cp;
7695         uid_t user;
7696         gid_t grp;
7697
7698         NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
7699         error = 0;
7700         flp = NULL;
7701         gotiomode = -1;
7702         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
7703         if (*tl++ != 0)
7704                 *retonclosep = 1;
7705         else
7706                 *retonclosep = 0;
7707         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
7708         NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
7709             (int)stateidp->seqid);
7710         stateidp->other[0] = *tl++;
7711         stateidp->other[1] = *tl++;
7712         stateidp->other[2] = *tl++;
7713         cnt = fxdr_unsigned(int, *tl);
7714         NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
7715         if (cnt <= 0 || cnt > 10000) {
7716                 /* Don't accept more than 10000 layouts in reply. */
7717                 error = NFSERR_BADXDR;
7718                 goto nfsmout;
7719         }
7720         for (i = 0; i < cnt; i++) {
7721                 /* Dissect to the layout type. */
7722                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
7723                     3 * NFSX_UNSIGNED);
7724                 off = fxdr_hyper(tl); tl += 2;
7725                 retlen = fxdr_hyper(tl); tl += 2;
7726                 iomode = fxdr_unsigned(int, *tl++);
7727                 laytype = fxdr_unsigned(int, *tl);
7728                 NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
7729                     (uintmax_t)off, (uintmax_t)retlen, iomode);
7730                 /* Ignore length of layout body for now. */
7731                 if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
7732                         /* Parse the File layout up to fhcnt. */
7733                         NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
7734                             NFSX_HYPER + NFSX_V4DEVICEID);
7735                         fhcnt = fxdr_unsigned(int, *(tl + 4 +
7736                             NFSX_V4DEVICEID / NFSX_UNSIGNED));
7737                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7738                         if (fhcnt < 0 || fhcnt > 100) {
7739                                 /* Don't accept more than 100 file handles. */
7740                                 error = NFSERR_BADXDR;
7741                                 goto nfsmout;
7742                         }
7743                         if (fhcnt > 0)
7744                                 flp = malloc(sizeof(*flp) + fhcnt *
7745                                     sizeof(struct nfsfh *), M_NFSFLAYOUT,
7746                                     M_WAITOK);
7747                         else
7748                                 flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
7749                                     M_WAITOK);
7750                         flp->nfsfl_flags = NFSFL_FILE;
7751                         flp->nfsfl_fhcnt = 0;
7752                         flp->nfsfl_devp = NULL;
7753                         flp->nfsfl_off = off;
7754                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7755                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7756                         else
7757                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
7758                         flp->nfsfl_iomode = iomode;
7759                         if (gotiomode == -1)
7760                                 gotiomode = flp->nfsfl_iomode;
7761                         /* Ignore layout body length for now. */
7762                         NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
7763                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7764                         flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
7765                         NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
7766                         mtx_lock(&nmp->nm_mtx);
7767                         if (nmp->nm_minorvers > 1 && (flp->nfsfl_util &
7768                             NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0)
7769                                 nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7770                         mtx_unlock(&nmp->nm_mtx);
7771                         flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
7772                         flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
7773                         NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
7774                             flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
7775                         for (j = 0; j < fhcnt; j++) {
7776                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7777                                 nfhlen = fxdr_unsigned(int, *tl);
7778                                 if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
7779                                         error = NFSERR_BADXDR;
7780                                         goto nfsmout;
7781                                 }
7782                                 nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
7783                                     M_NFSFH, M_WAITOK);
7784                                 flp->nfsfl_fh[j] = nfhp;
7785                                 flp->nfsfl_fhcnt++;
7786                                 nfhp->nfh_len = nfhlen;
7787                                 NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
7788                                 NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
7789                         }
7790                 } else if (laytype == NFSLAYOUT_FLEXFILE) {
7791                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
7792                             NFSX_HYPER);
7793                         mirrorcnt = fxdr_unsigned(int, *(tl + 2));
7794                         NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
7795                         if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
7796                                 error = NFSERR_BADXDR;
7797                                 goto nfsmout;
7798                         }
7799                         flp = malloc(sizeof(*flp) + mirrorcnt *
7800                             sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
7801                         flp->nfsfl_flags = NFSFL_FLEXFILE;
7802                         flp->nfsfl_mirrorcnt = mirrorcnt;
7803                         for (j = 0; j < mirrorcnt; j++)
7804                                 flp->nfsfl_ffm[j].devp = NULL;
7805                         flp->nfsfl_off = off;
7806                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7807                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7808                         else
7809                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
7810                         flp->nfsfl_iomode = iomode;
7811                         if (gotiomode == -1)
7812                                 gotiomode = flp->nfsfl_iomode;
7813                         flp->nfsfl_stripeunit = fxdr_hyper(tl);
7814                         NFSCL_DEBUG(4, "stripeunit=%ju\n",
7815                             (uintmax_t)flp->nfsfl_stripeunit);
7816                         for (j = 0; j < mirrorcnt; j++) {
7817                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7818                                 k = fxdr_unsigned(int, *tl);
7819                                 if (k < 1 || k > 128) {
7820                                         error = NFSERR_BADXDR;
7821                                         goto nfsmout;
7822                                 }
7823                                 NFSCL_DEBUG(4, "servercnt=%d\n", k);
7824                                 for (l = 0; l < k; l++) {
7825                                         NFSM_DISSECT(tl, uint32_t *,
7826                                             NFSX_V4DEVICEID + NFSX_STATEID +
7827                                             2 * NFSX_UNSIGNED);
7828                                         if (l == 0) {
7829                                                 /* Just use the first server. */
7830                                                 NFSBCOPY(tl,
7831                                                     flp->nfsfl_ffm[j].dev,
7832                                                     NFSX_V4DEVICEID);
7833                                                 tl += (NFSX_V4DEVICEID /
7834                                                     NFSX_UNSIGNED);
7835                                                 tl++;
7836                                                 flp->nfsfl_ffm[j].st.seqid =
7837                                                     *tl++;
7838                                                 flp->nfsfl_ffm[j].st.other[0] =
7839                                                     *tl++;
7840                                                 flp->nfsfl_ffm[j].st.other[1] =
7841                                                     *tl++;
7842                                                 flp->nfsfl_ffm[j].st.other[2] =
7843                                                     *tl++;
7844                                                 NFSCL_DEBUG(4, "st.seqid=%u "
7845                                                  "st.o0=0x%x st.o1=0x%x "
7846                                                  "st.o2=0x%x\n",
7847                                                  flp->nfsfl_ffm[j].st.seqid,
7848                                                  flp->nfsfl_ffm[j].st.other[0],
7849                                                  flp->nfsfl_ffm[j].st.other[1],
7850                                                  flp->nfsfl_ffm[j].st.other[2]);
7851                                         } else
7852                                                 tl += ((NFSX_V4DEVICEID +
7853                                                     NFSX_STATEID +
7854                                                     NFSX_UNSIGNED) /
7855                                                     NFSX_UNSIGNED);
7856                                         fhcnt = fxdr_unsigned(int, *tl);
7857                                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7858                                         if (fhcnt < 1 ||
7859                                             fhcnt > NFSDEV_MAXVERS) {
7860                                                 error = NFSERR_BADXDR;
7861                                                 goto nfsmout;
7862                                         }
7863                                         for (m = 0; m < fhcnt; m++) {
7864                                                 NFSM_DISSECT(tl, uint32_t *,
7865                                                     NFSX_UNSIGNED);
7866                                                 nfhlen = fxdr_unsigned(int,
7867                                                     *tl);
7868                                                 NFSCL_DEBUG(4, "nfhlen=%d\n",
7869                                                     nfhlen);
7870                                                 if (nfhlen <= 0 || nfhlen >
7871                                                     NFSX_V4FHMAX) {
7872                                                         error = NFSERR_BADXDR;
7873                                                         goto nfsmout;
7874                                                 }
7875                                                 NFSM_DISSECT(cp, uint8_t *,
7876                                                     NFSM_RNDUP(nfhlen));
7877                                                 if (l == 0) {
7878                                                         flp->nfsfl_ffm[j].fhcnt 
7879                                                             = fhcnt;
7880                                                         nfhp = malloc(
7881                                                             sizeof(*nfhp) +
7882                                                             nfhlen - 1, M_NFSFH,
7883                                                             M_WAITOK);
7884                                                         flp->nfsfl_ffm[j].fh[m]
7885                                                             = nfhp;
7886                                                         nfhp->nfh_len = nfhlen;
7887                                                         NFSBCOPY(cp,
7888                                                             nfhp->nfh_fh,
7889                                                             nfhlen);
7890                                                         NFSCL_DEBUG(4,
7891                                                             "got fh\n");
7892                                                 }
7893                                         }
7894                                         /* Now, get the ffsd_user/ffds_group. */
7895                                         error = nfsrv_parseug(nd, 0, &user,
7896                                             &grp, curthread);
7897                                         NFSCL_DEBUG(4, "after parseu=%d\n",
7898                                             error);
7899                                         if (error == 0)
7900                                                 error = nfsrv_parseug(nd, 1,
7901                                                     &user, &grp, curthread);
7902                                         NFSCL_DEBUG(4, "aft parseg=%d\n",
7903                                             grp);
7904                                         if (error != 0)
7905                                                 goto nfsmout;
7906                                         NFSCL_DEBUG(4, "user=%d group=%d\n",
7907                                             user, grp);
7908                                         if (l == 0) {
7909                                                 flp->nfsfl_ffm[j].user = user;
7910                                                 flp->nfsfl_ffm[j].group = grp;
7911                                                 NFSCL_DEBUG(4,
7912                                                     "usr=%d grp=%d\n", user,
7913                                                     grp);
7914                                         }
7915                                 }
7916                         }
7917                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7918                         flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
7919 #ifdef notnow
7920                         /*
7921                          * At this time, there is no flag.
7922                          * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be
7923                          * added, or it may never exist?
7924                          */
7925                         mtx_lock(&nmp->nm_mtx);
7926                         if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags &
7927                             NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0)
7928                                 nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7929                         mtx_unlock(&nmp->nm_mtx);
7930 #endif
7931                         flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
7932                         NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
7933                             flp->nfsfl_fflags, flp->nfsfl_statshint);
7934                 } else {
7935                         error = NFSERR_BADXDR;
7936                         goto nfsmout;
7937                 }
7938                 if (flp->nfsfl_iomode == gotiomode) {
7939                         /* Keep the list in increasing offset order. */
7940                         tflp = LIST_FIRST(flhp);
7941                         prevflp = NULL;
7942                         while (tflp != NULL &&
7943                             tflp->nfsfl_off < flp->nfsfl_off) {
7944                                 prevflp = tflp;
7945                                 tflp = LIST_NEXT(tflp, nfsfl_list);
7946                         }
7947                         if (prevflp == NULL)
7948                                 LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
7949                         else
7950                                 LIST_INSERT_AFTER(prevflp, flp,
7951                                     nfsfl_list);
7952                         NFSCL_DEBUG(4, "flp inserted\n");
7953                 } else {
7954                         printf("nfscl_layoutget(): got wrong iomode\n");
7955                         nfscl_freeflayout(flp);
7956                 }
7957                 flp = NULL;
7958         }
7959 nfsmout:
7960         NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
7961         if (error != 0 && flp != NULL)
7962                 nfscl_freeflayout(flp);
7963         return (error);
7964 }
7965
7966 /*
7967  * Parse a user/group digit string.
7968  */
7969 static int
7970 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
7971     NFSPROC_T *p)
7972 {
7973         uint32_t *tl;
7974         char *cp, *str, str0[NFSV4_SMALLSTR + 1];
7975         uint32_t len = 0;
7976         int error = 0;
7977
7978         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7979         len = fxdr_unsigned(uint32_t, *tl);
7980         str = NULL;
7981         if (len > NFSV4_OPAQUELIMIT) {
7982                 error = NFSERR_BADXDR;
7983                 goto nfsmout;
7984         }
7985         NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
7986         if (len == 0) {
7987                 if (dogrp != 0)
7988                         *gidp = GID_NOGROUP;
7989                 else
7990                         *uidp = UID_NOBODY;
7991                 return (0);
7992         }
7993         if (len > NFSV4_SMALLSTR)
7994                 str = malloc(len + 1, M_TEMP, M_WAITOK);
7995         else
7996                 str = str0;
7997         NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7998         NFSBCOPY(cp, str, len);
7999         str[len] = '\0';
8000         NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
8001         if (dogrp != 0)
8002                 error = nfsv4_strtogid(nd, str, len, gidp);
8003         else
8004                 error = nfsv4_strtouid(nd, str, len, uidp);
8005 nfsmout:
8006         if (len > NFSV4_SMALLSTR)
8007                 free(str, M_TEMP);
8008         NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
8009         return (error);
8010 }
8011
8012 /*
8013  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
8014  * so that it does both an Open and a Layoutget.
8015  */
8016 static int
8017 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
8018     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
8019     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
8020     struct ucred *cred, NFSPROC_T *p)
8021 {
8022         struct nfscllayout *lyp;
8023         struct nfsclflayout *flp;
8024         struct nfsclflayouthead flh;
8025         int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
8026         int layouttype, laystat;
8027         nfsv4stateid_t stateid;
8028         struct nfsclsession *tsep;
8029
8030         error = 0;
8031         if (NFSHASFLEXFILE(nmp))
8032                 layouttype = NFSLAYOUT_FLEXFILE;
8033         else
8034                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
8035         /*
8036          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
8037          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
8038          * flp == NULL.
8039          */
8040         lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, mode, &flp,
8041             &recalled);
8042         NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
8043         if (lyp == NULL)
8044                 islocked = 0;
8045         else if (flp != NULL)
8046                 islocked = 1;
8047         else
8048                 islocked = 2;
8049         if ((lyp == NULL || flp == NULL) && recalled == 0) {
8050                 LIST_INIT(&flh);
8051                 tsep = nfsmnt_mdssession(nmp);
8052                 layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
8053                     3 * NFSX_UNSIGNED);
8054                 if (lyp == NULL)
8055                         usecurstateid = 1;
8056                 else {
8057                         usecurstateid = 0;
8058                         stateid.seqid = lyp->nfsly_stateid.seqid;
8059                         stateid.other[0] = lyp->nfsly_stateid.other[0];
8060                         stateid.other[1] = lyp->nfsly_stateid.other[1];
8061                         stateid.other[2] = lyp->nfsly_stateid.other[2];
8062                 }
8063                 error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
8064                     newfhp, newfhlen, mode, op, name, namelen,
8065                     dpp, &stateid, usecurstateid, layouttype, layoutlen,
8066                     &retonclose, &flh, &laystat, cred, p);
8067                 NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
8068                     laystat, error);
8069                 laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
8070                     &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
8071                     &islocked, cred, p);
8072         } else
8073                 error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
8074                     mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
8075         if (islocked == 2)
8076                 nfscl_rellayout(lyp, 1);
8077         else if (islocked == 1)
8078                 nfscl_rellayout(lyp, 0);
8079         return (error);
8080 }
8081
8082 /*
8083  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
8084  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
8085  * handled by nfsrpc_openrpc().
8086  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
8087  * can be NULL.
8088  */
8089 static int
8090 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
8091     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
8092     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
8093     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
8094     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
8095     int *laystatp, struct ucred *cred, NFSPROC_T *p)
8096 {
8097         uint32_t *tl;
8098         struct nfsrv_descript nfsd, *nd = &nfsd;
8099         struct nfscldeleg *ndp = NULL;
8100         struct nfsvattr nfsva;
8101         struct nfsclsession *tsep;
8102         uint32_t rflags, deleg;
8103         nfsattrbit_t attrbits;
8104         int error, ret, acesize, limitby, iomode;
8105
8106         *dpp = NULL;
8107         *laystatp = ENXIO;
8108         nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
8109             0, 0, cred);
8110         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
8111         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
8112         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
8113         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
8114         tsep = nfsmnt_mdssession(nmp);
8115         *tl++ = tsep->nfsess_clientid.lval[0];
8116         *tl = tsep->nfsess_clientid.lval[1];
8117         nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
8118         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8119         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
8120         if (NFSHASNFSV4N(nmp)) {
8121                 *tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
8122         } else {
8123                 *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8124                 nfsm_strtom(nd, name, namelen);
8125         }
8126         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8127         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8128         NFSZERO_ATTRBIT(&attrbits);
8129         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
8130         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
8131         nfsrv_putattrbit(nd, &attrbits);
8132         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8133         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8134         if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
8135                 iomode = NFSLAYOUTIOMODE_RW;
8136         else
8137                 iomode = NFSLAYOUTIOMODE_READ;
8138         nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
8139             layouttype, layoutlen, usecurstateid);
8140         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
8141             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
8142         if (error != 0)
8143                 return (error);
8144         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
8145         if (nd->nd_repstat != 0)
8146                 *laystatp = nd->nd_repstat;
8147         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8148                 /* ND_NOMOREDATA will be set if the Open operation failed. */
8149                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8150                     6 * NFSX_UNSIGNED);
8151                 op->nfso_stateid.seqid = *tl++;
8152                 op->nfso_stateid.other[0] = *tl++;
8153                 op->nfso_stateid.other[1] = *tl++;
8154                 op->nfso_stateid.other[2] = *tl;
8155                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
8156                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8157                 if (error != 0)
8158                         goto nfsmout;
8159                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8160                 deleg = fxdr_unsigned(u_int32_t, *tl);
8161                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
8162                     deleg == NFSV4OPEN_DELEGATEWRITE) {
8163                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
8164                               NFSCLFLAGS_FIRSTDELEG))
8165                                 op->nfso_own->nfsow_clp->nfsc_flags |=
8166                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8167                         ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
8168                             M_NFSCLDELEG, M_WAITOK);
8169                         LIST_INIT(&ndp->nfsdl_owner);
8170                         LIST_INIT(&ndp->nfsdl_lock);
8171                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
8172                         ndp->nfsdl_fhlen = newfhlen;
8173                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
8174                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
8175                         nfscl_lockinit(&ndp->nfsdl_rwlock);
8176                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8177                             NFSX_UNSIGNED);
8178                         ndp->nfsdl_stateid.seqid = *tl++;
8179                         ndp->nfsdl_stateid.other[0] = *tl++;
8180                         ndp->nfsdl_stateid.other[1] = *tl++;
8181                         ndp->nfsdl_stateid.other[2] = *tl++;
8182                         ret = fxdr_unsigned(int, *tl);
8183                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8184                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
8185                                 /*
8186                                  * Indicates how much the file can grow.
8187                                  */
8188                                 NFSM_DISSECT(tl, u_int32_t *,
8189                                     3 * NFSX_UNSIGNED);
8190                                 limitby = fxdr_unsigned(int, *tl++);
8191                                 switch (limitby) {
8192                                 case NFSV4OPEN_LIMITSIZE:
8193                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
8194                                         break;
8195                                 case NFSV4OPEN_LIMITBLOCKS:
8196                                         ndp->nfsdl_sizelimit =
8197                                             fxdr_unsigned(u_int64_t, *tl++);
8198                                         ndp->nfsdl_sizelimit *=
8199                                             fxdr_unsigned(u_int64_t, *tl);
8200                                         break;
8201                                 default:
8202                                         error = NFSERR_BADXDR;
8203                                         goto nfsmout;
8204                                 };
8205                         } else
8206                                 ndp->nfsdl_flags = NFSCLDL_READ;
8207                         if (ret != 0)
8208                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
8209                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
8210                             &ret, &acesize, p);
8211                         if (error != 0)
8212                                 goto nfsmout;
8213                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
8214                         error = NFSERR_BADXDR;
8215                         goto nfsmout;
8216                 }
8217                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
8218                     nfscl_assumeposixlocks)
8219                         op->nfso_posixlock = 1;
8220                 else
8221                         op->nfso_posixlock = 0;
8222                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8223                 /* If the 2nd element == NFS_OK, the Getattr succeeded. */
8224                 if (*++tl == 0) {
8225                         error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
8226                             NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
8227                             NULL, NULL, NULL, p, cred);
8228                         if (error != 0)
8229                                 goto nfsmout;
8230                         if (ndp != NULL) {
8231                                 ndp->nfsdl_change = nfsva.na_filerev;
8232                                 ndp->nfsdl_modtime = nfsva.na_mtime;
8233                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8234                                 *dpp = ndp;
8235                                 ndp = NULL;
8236                         }
8237                         /*
8238                          * At this point, the Open has succeeded, so set
8239                          * nd_repstat = NFS_OK.  If the Layoutget failed,
8240                          * this function just won't return a layout.
8241                          */
8242                         if (nd->nd_repstat == 0) {
8243                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8244                                 *laystatp = fxdr_unsigned(int, *++tl);
8245                                 if (*laystatp == 0) {
8246                                         error = nfsrv_parselayoutget(nmp, nd,
8247                                             stateidp, retonclosep, flhp);
8248                                         if (error != 0)
8249                                                 *laystatp = error;
8250                                 }
8251                         } else
8252                                 nd->nd_repstat = 0;     /* Return 0 for Open. */
8253                 }
8254         }
8255         if (nd->nd_repstat != 0 && error == 0)
8256                 error = nd->nd_repstat;
8257 nfsmout:
8258         free(ndp, M_NFSCLDELEG);
8259         m_freem(nd->nd_mrep);
8260         return (error);
8261 }
8262
8263 /*
8264  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
8265  * Used only for mounts with pNFS enabled.
8266  */
8267 static int
8268 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8269     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8270     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8271     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8272     int *dattrflagp, int *unlockedp, nfsv4stateid_t *stateidp,
8273     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
8274     struct nfsclflayouthead *flhp, int *laystatp)
8275 {
8276         uint32_t *tl;
8277         int error = 0, deleg, newone, ret, acesize, limitby;
8278         struct nfsrv_descript nfsd, *nd = &nfsd;
8279         struct nfsclopen *op;
8280         struct nfscldeleg *dp = NULL;
8281         struct nfsnode *np;
8282         struct nfsfh *nfhp;
8283         struct nfsclsession *tsep;
8284         nfsattrbit_t attrbits;
8285         nfsv4stateid_t stateid;
8286         struct nfsmount *nmp;
8287
8288         nmp = VFSTONFS(dvp->v_mount);
8289         np = VTONFS(dvp);
8290         *laystatp = ENXIO;
8291         *unlockedp = 0;
8292         *nfhpp = NULL;
8293         *dpp = NULL;
8294         *attrflagp = 0;
8295         *dattrflagp = 0;
8296         if (namelen > NFS_MAXNAMLEN)
8297                 return (ENAMETOOLONG);
8298         NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp, cred);
8299         /*
8300          * For V4, this is actually an Open op.
8301          */
8302         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
8303         *tl++ = txdr_unsigned(owp->nfsow_seqid);
8304         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
8305             NFSV4OPEN_ACCESSREAD);
8306         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
8307         tsep = nfsmnt_mdssession(nmp);
8308         *tl++ = tsep->nfsess_clientid.lval[0];
8309         *tl = tsep->nfsess_clientid.lval[1];
8310         nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
8311         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8312         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
8313         if ((fmode & O_EXCL) != 0) {
8314                 if (NFSHASSESSPERSIST(nmp)) {
8315                         /* Use GUARDED for persistent sessions. */
8316                         *tl = txdr_unsigned(NFSCREATE_GUARDED);
8317                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
8318                 } else {
8319                         /* Otherwise, use EXCLUSIVE4_1. */
8320                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
8321                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
8322                         *tl++ = cverf.lval[0];
8323                         *tl = cverf.lval[1];
8324                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
8325                 }
8326         } else {
8327                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
8328                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
8329         }
8330         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8331         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8332         nfsm_strtom(nd, name, namelen);
8333         /* Get the new file's handle and attributes, plus save the FH. */
8334         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
8335         *tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
8336         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
8337         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8338         NFSGETATTR_ATTRBIT(&attrbits);
8339         nfsrv_putattrbit(nd, &attrbits);
8340         /* Get the directory's post-op attributes. */
8341         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8342         *tl = txdr_unsigned(NFSV4OP_PUTFH);
8343         (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
8344         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8345         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8346         nfsrv_putattrbit(nd, &attrbits);
8347         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8348         *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
8349         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8350         nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
8351             layouttype, layoutlen, usecurstateid);
8352         error = nfscl_request(nd, dvp, p, cred);
8353         if (error != 0)
8354                 return (error);
8355         NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
8356             error);
8357         if (nd->nd_repstat != 0)
8358                 *laystatp = nd->nd_repstat;
8359         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
8360         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8361                 NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
8362                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8363                     6 * NFSX_UNSIGNED);
8364                 stateid.seqid = *tl++;
8365                 stateid.other[0] = *tl++;
8366                 stateid.other[1] = *tl++;
8367                 stateid.other[2] = *tl;
8368                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8369                 if (error != 0)
8370                         goto nfsmout;
8371                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8372                 deleg = fxdr_unsigned(int, *tl);
8373                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
8374                     deleg == NFSV4OPEN_DELEGATEWRITE) {
8375                         if (!(owp->nfsow_clp->nfsc_flags &
8376                               NFSCLFLAGS_FIRSTDELEG))
8377                                 owp->nfsow_clp->nfsc_flags |=
8378                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8379                         dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
8380                             M_NFSCLDELEG, M_WAITOK);
8381                         LIST_INIT(&dp->nfsdl_owner);
8382                         LIST_INIT(&dp->nfsdl_lock);
8383                         dp->nfsdl_clp = owp->nfsow_clp;
8384                         newnfs_copyincred(cred, &dp->nfsdl_cred);
8385                         nfscl_lockinit(&dp->nfsdl_rwlock);
8386                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8387                             NFSX_UNSIGNED);
8388                         dp->nfsdl_stateid.seqid = *tl++;
8389                         dp->nfsdl_stateid.other[0] = *tl++;
8390                         dp->nfsdl_stateid.other[1] = *tl++;
8391                         dp->nfsdl_stateid.other[2] = *tl++;
8392                         ret = fxdr_unsigned(int, *tl);
8393                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8394                                 dp->nfsdl_flags = NFSCLDL_WRITE;
8395                                 /*
8396                                  * Indicates how much the file can grow.
8397                                  */
8398                                 NFSM_DISSECT(tl, u_int32_t *,
8399                                     3 * NFSX_UNSIGNED);
8400                                 limitby = fxdr_unsigned(int, *tl++);
8401                                 switch (limitby) {
8402                                 case NFSV4OPEN_LIMITSIZE:
8403                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
8404                                         break;
8405                                 case NFSV4OPEN_LIMITBLOCKS:
8406                                         dp->nfsdl_sizelimit =
8407                                             fxdr_unsigned(u_int64_t, *tl++);
8408                                         dp->nfsdl_sizelimit *=
8409                                             fxdr_unsigned(u_int64_t, *tl);
8410                                         break;
8411                                 default:
8412                                         error = NFSERR_BADXDR;
8413                                         goto nfsmout;
8414                                 };
8415                         } else {
8416                                 dp->nfsdl_flags = NFSCLDL_READ;
8417                         }
8418                         if (ret != 0)
8419                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
8420                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
8421                             &ret, &acesize, p);
8422                         if (error != 0)
8423                                 goto nfsmout;
8424                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
8425                         error = NFSERR_BADXDR;
8426                         goto nfsmout;
8427                 }
8428
8429                 /* Now, we should have the status for the SaveFH. */
8430                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8431                 if (*++tl == 0) {
8432                         NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
8433                         /*
8434                          * Now, process the GetFH and Getattr for the newly
8435                          * created file. nfscl_mtofh() will set
8436                          * ND_NOMOREDATA if these weren't successful.
8437                          */
8438                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
8439                         NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
8440                         if (error != 0)
8441                                 goto nfsmout;
8442                 } else
8443                         nd->nd_flag |= ND_NOMOREDATA;
8444                 /* Now we have the PutFH and Getattr for the directory. */
8445                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8446                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8447                         if (*++tl != 0)
8448                                 nd->nd_flag |= ND_NOMOREDATA;
8449                         else {
8450                                 NFSM_DISSECT(tl, uint32_t *, 2 *
8451                                     NFSX_UNSIGNED);
8452                                 if (*++tl != 0)
8453                                         nd->nd_flag |= ND_NOMOREDATA;
8454                         }
8455                 }
8456                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8457                         /* Load the directory attributes. */
8458                         error = nfsm_loadattr(nd, dnap);
8459                         NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
8460                         if (error != 0)
8461                                 goto nfsmout;
8462                         *dattrflagp = 1;
8463                         if (dp != NULL && *attrflagp != 0) {
8464                                 dp->nfsdl_change = nnap->na_filerev;
8465                                 dp->nfsdl_modtime = nnap->na_mtime;
8466                                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8467                         }
8468                         /*
8469                          * We can now complete the Open state.
8470                          */
8471                         nfhp = *nfhpp;
8472                         if (dp != NULL) {
8473                                 dp->nfsdl_fhlen = nfhp->nfh_len;
8474                                 NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
8475                                     nfhp->nfh_len);
8476                         }
8477                         /*
8478                          * Get an Open structure that will be
8479                          * attached to the OpenOwner, acquired already.
8480                          */
8481                         error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
8482                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
8483                             cred, p, NULL, &op, &newone, NULL, 0, false);
8484                         if (error != 0)
8485                                 goto nfsmout;
8486                         op->nfso_stateid = stateid;
8487                         newnfs_copyincred(cred, &op->nfso_cred);
8488
8489                         nfscl_openrelease(nmp, op, error, newone);
8490                         *unlockedp = 1;
8491
8492                         /* Now, handle the RestoreFH and LayoutGet. */
8493                         if (nd->nd_repstat == 0) {
8494                                 NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
8495                                 *laystatp = fxdr_unsigned(int, *(tl + 3));
8496                                 if (*laystatp == 0) {
8497                                         error = nfsrv_parselayoutget(nmp, nd,
8498                                             stateidp, retonclosep, flhp);
8499                                         if (error != 0)
8500                                                 *laystatp = error;
8501                                 }
8502                                 NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
8503                                     error);
8504                         } else
8505                                 nd->nd_repstat = 0;
8506                 }
8507         }
8508         if (nd->nd_repstat != 0 && error == 0)
8509                 error = nd->nd_repstat;
8510         if (error == NFSERR_STALECLIENTID)
8511                 nfscl_initiate_recovery(owp->nfsow_clp);
8512 nfsmout:
8513         NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
8514         if (error == 0)
8515                 *dpp = dp;
8516         else
8517                 free(dp, M_NFSCLDELEG);
8518         m_freem(nd->nd_mrep);
8519         return (error);
8520 }
8521
8522 /*
8523  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
8524  */
8525 static int
8526 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8527     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8528     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8529     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8530     int *dattrflagp, int *unlockedp)
8531 {
8532         struct nfscllayout *lyp;
8533         struct nfsclflayouthead flh;
8534         struct nfsfh *nfhp;
8535         struct nfsclsession *tsep;
8536         struct nfsmount *nmp;
8537         nfsv4stateid_t stateid;
8538         int error, layoutlen, layouttype, retonclose, laystat;
8539
8540         error = 0;
8541         nmp = VFSTONFS(dvp->v_mount);
8542         if (NFSHASFLEXFILE(nmp))
8543                 layouttype = NFSLAYOUT_FLEXFILE;
8544         else
8545                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
8546         LIST_INIT(&flh);
8547         tsep = nfsmnt_mdssession(nmp);
8548         layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
8549         error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
8550             owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
8551             unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
8552             &flh, &laystat);
8553         NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
8554             laystat, error);
8555         lyp = NULL;
8556         if (laystat == 0) {
8557                 nfhp = *nfhpp;
8558                 laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
8559                     nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
8560                     layouttype, laystat, NULL, cred, p);
8561         } else
8562                 laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
8563                     retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
8564                     cred, p);
8565         if (laystat == 0)
8566                 nfscl_rellayout(lyp, 0);
8567         return (error);
8568 }
8569
8570 /*
8571  * Process the results of a layoutget() operation.
8572  */
8573 static int
8574 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
8575     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
8576     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
8577     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
8578 {
8579         struct nfsclflayout *tflp;
8580         struct nfscldevinfo *dip;
8581         uint8_t *dev;
8582         int i, mirrorcnt;
8583
8584         if (laystat == NFSERR_UNKNLAYOUTTYPE) {
8585                 NFSLOCKMNT(nmp);
8586                 if (!NFSHASFLEXFILE(nmp)) {
8587                         /* Switch to using Flex File Layout. */
8588                         nmp->nm_state |= NFSSTA_FLEXFILE;
8589                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
8590                         /* Disable pNFS. */
8591                         NFSCL_DEBUG(1, "disable PNFS\n");
8592                         nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
8593                 }
8594                 NFSUNLOCKMNT(nmp);
8595         }
8596         if (laystat == 0) {
8597                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
8598                 LIST_FOREACH(tflp, flhp, nfsfl_list) {
8599                         if (layouttype == NFSLAYOUT_FLEXFILE)
8600                                 mirrorcnt = tflp->nfsfl_mirrorcnt;
8601                         else
8602                                 mirrorcnt = 1;
8603                         for (i = 0; i < mirrorcnt; i++) {
8604                                 laystat = nfscl_adddevinfo(nmp, NULL, i, tflp);
8605                                 NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
8606                                 if (laystat != 0) {
8607                                         if (layouttype == NFSLAYOUT_FLEXFILE)
8608                                                 dev = tflp->nfsfl_ffm[i].dev;
8609                                         else
8610                                                 dev = tflp->nfsfl_dev;
8611                                         laystat = nfsrpc_getdeviceinfo(nmp, dev,
8612                                             layouttype, notifybit, &dip, cred,
8613                                             p);
8614                                         NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
8615                                             laystat);
8616                                         if (laystat != 0)
8617                                                 goto out;
8618                                         laystat = nfscl_adddevinfo(nmp, dip, i,
8619                                             tflp);
8620                                         if (laystat != 0)
8621                                                 printf("nfsrpc_layoutgetresout"
8622                                                     ": cannot add\n");
8623                                 }
8624                         }
8625                 }
8626         }
8627 out:
8628         if (laystat == 0) {
8629                 /*
8630                  * nfscl_layout() always returns with the nfsly_lock
8631                  * set to a refcnt (shared lock).
8632                  * Passing in dvp is sufficient, since it is only used to
8633                  * get the fsid for the file system.
8634                  */
8635                 laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
8636                     layouttype, retonclose, flhp, lypp, cred, p);
8637                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
8638                     laystat);
8639                 if (laystat == 0 && islockedp != NULL)
8640                         *islockedp = 1;
8641         }
8642         return (laystat);
8643 }
8644
8645 /*
8646  * nfs copy_file_range operation.
8647  */
8648 int
8649 nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp,
8650     off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp,
8651     struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap,
8652     struct ucred *cred, bool consecutive, bool *must_commitp)
8653 {
8654         int commit, error, expireret = 0, retrycnt;
8655         u_int32_t clidrev = 0;
8656         struct nfsmount *nmp = VFSTONFS(invp->v_mount);
8657         struct nfsfh *innfhp = NULL, *outnfhp = NULL;
8658         nfsv4stateid_t instateid, outstateid;
8659         void *inlckp, *outlckp;
8660
8661         if (nmp->nm_clp != NULL)
8662                 clidrev = nmp->nm_clp->nfsc_clientidrev;
8663         innfhp = VTONFS(invp)->n_fhp;
8664         outnfhp = VTONFS(outvp)->n_fhp;
8665         retrycnt = 0;
8666         do {
8667                 /* Get both stateids. */
8668                 inlckp = NULL;
8669                 nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len,
8670                     NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid,
8671                     &inlckp);
8672                 outlckp = NULL;
8673                 nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len,
8674                     NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid,
8675                     &outlckp);
8676
8677                 error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp,
8678                     &instateid, &outstateid, innap, inattrflagp, outnap,
8679                     outattrflagp, consecutive, &commit, cred, curthread);
8680                 if (error == 0) {
8681                         if (commit != NFSWRITE_FILESYNC)
8682                                 *must_commitp = true;
8683                         *inoffp += *lenp;
8684                         *outoffp += *lenp;
8685                 } else if (error == NFSERR_STALESTATEID)
8686                         nfscl_initiate_recovery(nmp->nm_clp);
8687                 if (inlckp != NULL)
8688                         nfscl_lockderef(inlckp);
8689                 if (outlckp != NULL)
8690                         nfscl_lockderef(outlckp);
8691                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8692                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8693                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8694                         (void) nfs_catnap(PZERO, error, "nfs_cfr");
8695                 } else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
8696                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
8697                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8698                             curthread);
8699                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
8700                         error = EIO;
8701                 }
8702                 retrycnt++;
8703         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
8704             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8705               error == NFSERR_STALEDONTRECOVER ||
8706             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8707             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8708              expireret == 0 && clidrev != 0 && retrycnt < 4));
8709         if (error != 0 && (retrycnt >= 4 ||
8710             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8711               error == NFSERR_STALEDONTRECOVER))
8712                 error = EIO;
8713         return (error);
8714 }
8715
8716 /*
8717  * The copy RPC.
8718  */
8719 static int
8720 nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff,
8721     size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp,
8722     struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap,
8723     int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred,
8724     NFSPROC_T *p)
8725 {
8726         uint32_t *tl, *opcntp;
8727         int error;
8728         struct nfsrv_descript nfsd;
8729         struct nfsrv_descript *nd = &nfsd;
8730         struct nfsmount *nmp;
8731         nfsattrbit_t attrbits;
8732         struct vattr va;
8733         uint64_t len;
8734
8735         nmp = VFSTONFS(invp->v_mount);
8736         *inattrflagp = *outattrflagp = 0;
8737         *commitp = NFSWRITE_UNSTABLE;
8738         len = *lenp;
8739         *lenp = 0;
8740         if (len > nfs_maxcopyrange)
8741                 len = nfs_maxcopyrange;
8742         nfscl_reqstart(nd, NFSPROC_COPY, nmp, VTONFS(invp)->n_fhp->nfh_fh,
8743             VTONFS(invp)->n_fhp->nfh_len, &opcntp, NULL, 0, 0, cred);
8744         /*
8745          * First do a Setattr of atime to the server's clock
8746          * time.  The FreeBSD "collective" was of the opinion
8747          * that setting atime was necessary for this syscall.
8748          * Do the Setattr before the Copy, so that it can be
8749          * handled well if the server replies NFSERR_DELAY to
8750          * the Setattr operation.
8751          */
8752         if ((nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 0) {
8753                 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8754                 *tl = txdr_unsigned(NFSV4OP_SETATTR);
8755                 nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8756                 VATTR_NULL(&va);
8757                 va.va_atime.tv_sec = va.va_atime.tv_nsec = 0;
8758                 va.va_vaflags = VA_UTIMES_NULL;
8759                 nfscl_fillsattr(nd, &va, invp, 0, 0);
8760                 /* Bump opcnt from 7 to 8. */
8761                 *opcntp = txdr_unsigned(8);
8762         }
8763
8764         /* Now Getattr the invp attributes. */
8765         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8766         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8767         NFSGETATTR_ATTRBIT(&attrbits);
8768         nfsrv_putattrbit(nd, &attrbits);
8769
8770         /* Set outvp. */
8771         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8772         *tl = txdr_unsigned(NFSV4OP_PUTFH);
8773         (void)nfsm_fhtom(nmp, nd, VTONFS(outvp)->n_fhp->nfh_fh,
8774             VTONFS(outvp)->n_fhp->nfh_len, 0);
8775
8776         /* Do the Copy. */
8777         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8778         *tl = txdr_unsigned(NFSV4OP_COPY);
8779         nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8780         nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID);
8781         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED);
8782         txdr_hyper(inoff, tl); tl += 2;
8783         txdr_hyper(outoff, tl); tl += 2;
8784         txdr_hyper(len, tl); tl += 2;
8785         if (consecutive)
8786                 *tl++ = newnfs_true;
8787         else
8788                 *tl++ = newnfs_false;
8789         *tl++ = newnfs_true;
8790         *tl++ = 0;
8791
8792         /* Get the outvp attributes. */
8793         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8794         NFSWRITEGETATTR_ATTRBIT(&attrbits);
8795         nfsrv_putattrbit(nd, &attrbits);
8796
8797         error = nfscl_request(nd, invp, p, cred);
8798         if (error != 0)
8799                 return (error);
8800         /* Skip over the Setattr reply. */
8801         if ((nd->nd_flag & ND_NOMOREDATA) == 0 &&
8802             (nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 0) {
8803                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8804                 if (*(tl + 1) == 0) {
8805                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8806                         if (error != 0)
8807                                 goto nfsmout;
8808                 } else
8809                         nd->nd_flag |= ND_NOMOREDATA;
8810         }
8811         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8812                 /* Get the input file's attributes. */
8813                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8814                 if (*(tl + 1) == 0) {
8815                         error = nfsm_loadattr(nd, innap);
8816                         if (error != 0)
8817                                 goto nfsmout;
8818                         *inattrflagp = 1;
8819                 } else
8820                         nd->nd_flag |= ND_NOMOREDATA;
8821         }
8822         /* Skip over return stat for PutFH. */
8823         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8824                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8825                 if (*++tl != 0)
8826                         nd->nd_flag |= ND_NOMOREDATA;
8827         }
8828         /* Skip over return stat for Copy. */
8829         if ((nd->nd_flag & ND_NOMOREDATA) == 0)
8830                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8831         if (nd->nd_repstat == 0) {
8832                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8833                 if (*tl != 0) {
8834                         /* There should be no callback ids. */
8835                         error = NFSERR_BADXDR;
8836                         goto nfsmout;
8837                 }
8838                 NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED +
8839                     NFSX_VERF);
8840                 len = fxdr_hyper(tl); tl += 2;
8841                 *commitp = fxdr_unsigned(int, *tl++);
8842                 NFSLOCKMNT(nmp);
8843                 if (!NFSHASWRITEVERF(nmp)) {
8844                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8845                         NFSSETWRITEVERF(nmp);
8846                 } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
8847                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8848                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
8849                 }
8850                 NFSUNLOCKMNT(nmp);
8851                 tl += (NFSX_VERF / NFSX_UNSIGNED);
8852                 if (nd->nd_repstat == 0 && *++tl != newnfs_true)
8853                         /* Must be a synchronous copy. */
8854                         nd->nd_repstat = NFSERR_NOTSUPP;
8855                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8856                 error = nfsm_loadattr(nd, outnap);
8857                 if (error == 0)
8858                         *outattrflagp = NFS_LATTR_NOSHRINK;
8859                 if (nd->nd_repstat == 0)
8860                         *lenp = len;
8861         } else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) {
8862                 /*
8863                  * For the case where consecutive is not supported, but
8864                  * synchronous is supported, we can try consecutive == false
8865                  * by returning this error.  Otherwise, return NFSERR_NOTSUPP,
8866                  * since Copy cannot be done.
8867                  */
8868                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8869                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8870                         if (!consecutive || *++tl == newnfs_false)
8871                                 nd->nd_repstat = NFSERR_NOTSUPP;
8872                 } else
8873                         nd->nd_repstat = NFSERR_BADXDR;
8874         }
8875         if (error == 0)
8876                 error = nd->nd_repstat;
8877 nfsmout:
8878         m_freem(nd->nd_mrep);
8879         return (error);
8880 }
8881
8882 /*
8883  * Seek operation.
8884  */
8885 int
8886 nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content,
8887     struct ucred *cred, struct nfsvattr *nap, int *attrflagp)
8888 {
8889         int error, expireret = 0, retrycnt;
8890         u_int32_t clidrev = 0;
8891         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
8892         struct nfsnode *np = VTONFS(vp);
8893         struct nfsfh *nfhp = NULL;
8894         nfsv4stateid_t stateid;
8895         void *lckp;
8896
8897         if (nmp->nm_clp != NULL)
8898                 clidrev = nmp->nm_clp->nfsc_clientidrev;
8899         nfhp = np->n_fhp;
8900         retrycnt = 0;
8901         do {
8902                 lckp = NULL;
8903                 nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
8904                     NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp);
8905                 error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content,
8906                     nap, attrflagp, cred);
8907                 if (error == NFSERR_STALESTATEID)
8908                         nfscl_initiate_recovery(nmp->nm_clp);
8909                 if (lckp != NULL)
8910                         nfscl_lockderef(lckp);
8911                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8912                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8913                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8914                         (void) nfs_catnap(PZERO, error, "nfs_seek");
8915                 } else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
8916                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
8917                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8918                             curthread);
8919                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
8920                         error = EIO;
8921                 }
8922                 retrycnt++;
8923         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8924             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8925             error == NFSERR_BADSESSION ||
8926             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8927             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8928              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
8929             (error == NFSERR_OPENMODE && retrycnt < 4));
8930         if (error && retrycnt >= 4)
8931                 error = EIO;
8932         return (error);
8933 }
8934
8935 /*
8936  * The seek RPC.
8937  */
8938 static int
8939 nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp,
8940     int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred)
8941 {
8942         uint32_t *tl;
8943         int error;
8944         struct nfsrv_descript nfsd;
8945         struct nfsrv_descript *nd = &nfsd;
8946         nfsattrbit_t attrbits;
8947
8948         *attrflagp = 0;
8949         NFSCL_REQSTART(nd, NFSPROC_SEEK, vp, cred);
8950         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
8951         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8952         txdr_hyper(*offp, tl); tl += 2;
8953         *tl++ = txdr_unsigned(content);
8954         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8955         NFSGETATTR_ATTRBIT(&attrbits);
8956         nfsrv_putattrbit(nd, &attrbits);
8957         error = nfscl_request(nd, vp, curthread, cred);
8958         if (error != 0)
8959                 return (error);
8960         if (nd->nd_repstat == 0) {
8961                 NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER);
8962                 if (*tl++ == newnfs_true)
8963                         *eofp = true;
8964                 else
8965                         *eofp = false;
8966                 *offp = fxdr_hyper(tl);
8967                 /* Just skip over Getattr op status. */
8968                 error = nfsm_loadattr(nd, nap);
8969                 if (error == 0)
8970                         *attrflagp = 1;
8971         }
8972         error = nd->nd_repstat;
8973 nfsmout:
8974         m_freem(nd->nd_mrep);
8975         return (error);
8976 }
8977
8978 /*
8979  * The getextattr RPC.
8980  */
8981 int
8982 nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp,
8983     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8984 {
8985         uint32_t *tl;
8986         int error;
8987         struct nfsrv_descript nfsd;
8988         struct nfsrv_descript *nd = &nfsd;
8989         nfsattrbit_t attrbits;
8990         uint32_t len, len2;
8991
8992         *attrflagp = 0;
8993         NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp, cred);
8994         nfsm_strtom(nd, name, strlen(name));
8995         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8996         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8997         NFSGETATTR_ATTRBIT(&attrbits);
8998         nfsrv_putattrbit(nd, &attrbits);
8999         error = nfscl_request(nd, vp, p, cred);
9000         if (error != 0)
9001                 return (error);
9002         if (nd->nd_repstat == 0) {
9003                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
9004                 len = fxdr_unsigned(uint32_t, *tl);
9005                 /* Sanity check lengths. */
9006                 if (uiop != NULL && len > 0 && len <= IOSIZE_MAX &&
9007                     uiop->uio_resid <= UINT32_MAX) {
9008                         len2 = uiop->uio_resid;
9009                         if (len2 >= len)
9010                                 error = nfsm_mbufuio(nd, uiop, len);
9011                         else {
9012                                 error = nfsm_mbufuio(nd, uiop, len2);
9013                                 if (error == 0) {
9014                                         /*
9015                                          * nfsm_mbufuio() advances to a multiple
9016                                          * of 4, so round up len2 as well.  Then
9017                                          * we need to advance over the rest of
9018                                          * the data, rounding up the remaining
9019                                          * length.
9020                                          */
9021                                         len2 = NFSM_RNDUP(len2);
9022                                         len2 = NFSM_RNDUP(len - len2);
9023                                         if (len2 > 0)
9024                                                 error = nfsm_advance(nd, len2,
9025                                                     -1);
9026                                 }
9027                         }
9028                 } else if (uiop == NULL && len > 0) {
9029                         /* Just wants the length and not the data. */
9030                         error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9031                 } else if (len > 0)
9032                         error = ENOATTR;
9033                 if (error != 0)
9034                         goto nfsmout;
9035                 *lenp = len;
9036                 /* Just skip over Getattr op status. */
9037                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
9038                 error = nfsm_loadattr(nd, nap);
9039                 if (error == 0)
9040                         *attrflagp = 1;
9041         }
9042         if (error == 0)
9043                 error = nd->nd_repstat;
9044 nfsmout:
9045         m_freem(nd->nd_mrep);
9046         return (error);
9047 }
9048
9049 /*
9050  * The setextattr RPC.
9051  */
9052 int
9053 nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop,
9054     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
9055 {
9056         uint32_t *tl;
9057         int error;
9058         struct nfsrv_descript nfsd;
9059         struct nfsrv_descript *nd = &nfsd;
9060         nfsattrbit_t attrbits;
9061
9062         *attrflagp = 0;
9063         NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp, cred);
9064         if (uiop->uio_resid > nd->nd_maxreq) {
9065                 /* nd_maxreq is set by NFSCL_REQSTART(). */
9066                 m_freem(nd->nd_mreq);
9067                 return (EINVAL);
9068         }
9069         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9070         *tl = txdr_unsigned(NFSV4SXATTR_EITHER);
9071         nfsm_strtom(nd, name, strlen(name));
9072         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9073         *tl = txdr_unsigned(uiop->uio_resid);
9074         error = nfsm_uiombuf(nd, uiop, uiop->uio_resid);
9075         if (error != 0) {
9076                 m_freem(nd->nd_mreq);
9077                 return (error);
9078         }
9079         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9080         *tl = txdr_unsigned(NFSV4OP_GETATTR);
9081         NFSGETATTR_ATTRBIT(&attrbits);
9082         nfsrv_putattrbit(nd, &attrbits);
9083         error = nfscl_request(nd, vp, p, cred);
9084         if (error != 0)
9085                 return (error);
9086         if (nd->nd_repstat == 0) {
9087                 /* Just skip over the reply and Getattr op status. */
9088                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
9089                     NFSX_UNSIGNED);
9090                 error = nfsm_loadattr(nd, nap);
9091                 if (error == 0)
9092                         *attrflagp = 1;
9093         }
9094         if (error == 0)
9095                 error = nd->nd_repstat;
9096 nfsmout:
9097         m_freem(nd->nd_mrep);
9098         return (error);
9099 }
9100
9101 /*
9102  * The removeextattr RPC.
9103  */
9104 int
9105 nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap,
9106     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
9107 {
9108         uint32_t *tl;
9109         int error;
9110         struct nfsrv_descript nfsd;
9111         struct nfsrv_descript *nd = &nfsd;
9112         nfsattrbit_t attrbits;
9113
9114         *attrflagp = 0;
9115         NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp, cred);
9116         nfsm_strtom(nd, name, strlen(name));
9117         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9118         *tl = txdr_unsigned(NFSV4OP_GETATTR);
9119         NFSGETATTR_ATTRBIT(&attrbits);
9120         nfsrv_putattrbit(nd, &attrbits);
9121         error = nfscl_request(nd, vp, p, cred);
9122         if (error != 0)
9123                 return (error);
9124         if (nd->nd_repstat == 0) {
9125                 /* Just skip over the reply and Getattr op status. */
9126                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
9127                     NFSX_UNSIGNED);
9128                 error = nfsm_loadattr(nd, nap);
9129                 if (error == 0)
9130                         *attrflagp = 1;
9131         }
9132         if (error == 0)
9133                 error = nd->nd_repstat;
9134 nfsmout:
9135         m_freem(nd->nd_mrep);
9136         return (error);
9137 }
9138
9139 /*
9140  * The listextattr RPC.
9141  */
9142 int
9143 nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop,
9144     size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp,
9145     struct ucred *cred, NFSPROC_T *p)
9146 {
9147         uint32_t *tl;
9148         int cnt, error, i, len;
9149         struct nfsrv_descript nfsd;
9150         struct nfsrv_descript *nd = &nfsd;
9151         nfsattrbit_t attrbits;
9152         u_char c;
9153
9154         *attrflagp = 0;
9155         NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp, cred);
9156         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
9157         txdr_hyper(*cookiep, tl); tl += 2;
9158         *tl++ = txdr_unsigned(*lenp);
9159         *tl = txdr_unsigned(NFSV4OP_GETATTR);
9160         NFSGETATTR_ATTRBIT(&attrbits);
9161         nfsrv_putattrbit(nd, &attrbits);
9162         error = nfscl_request(nd, vp, p, cred);
9163         if (error != 0)
9164                 return (error);
9165         *eofp = true;
9166         *lenp = 0;
9167         if (nd->nd_repstat == 0) {
9168                 NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
9169                 *cookiep = fxdr_hyper(tl); tl += 2;
9170                 cnt = fxdr_unsigned(int, *tl);
9171                 if (cnt < 0) {
9172                         error = EBADRPC;
9173                         goto nfsmout;
9174                 }
9175                 for (i = 0; i < cnt; i++) {
9176                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
9177                         len = fxdr_unsigned(int, *tl);
9178                         if (len <= 0 || len > EXTATTR_MAXNAMELEN) {
9179                                 error = EBADRPC;
9180                                 goto nfsmout;
9181                         }
9182                         if (uiop == NULL)
9183                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9184                         else if (uiop->uio_resid >= len + 1) {
9185                                 c = len;
9186                                 error = uiomove(&c, sizeof(c), uiop);
9187                                 if (error == 0)
9188                                         error = nfsm_mbufuio(nd, uiop, len);
9189                         } else {
9190                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9191                                 *eofp = false;
9192                         }
9193                         if (error != 0)
9194                                 goto nfsmout;
9195                         *lenp += (len + 1);
9196                 }
9197                 /* Get the eof and skip over the Getattr op status. */
9198                 NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
9199                 /*
9200                  * *eofp is set false above, because it wasn't able to copy
9201                  * all of the reply.
9202                  */
9203                 if (*eofp && *tl == 0)
9204                         *eofp = false;
9205                 error = nfsm_loadattr(nd, nap);
9206                 if (error == 0)
9207                         *attrflagp = 1;
9208         }
9209         if (error == 0)
9210                 error = nd->nd_repstat;
9211 nfsmout:
9212         m_freem(nd->nd_mrep);
9213         return (error);
9214 }
9215
9216 /*
9217  * Split an mbuf list.  For non-M_EXTPG mbufs, just use m_split().
9218  */
9219 static struct mbuf *
9220 nfsm_split(struct mbuf *mp, uint64_t xfer)
9221 {
9222         struct mbuf *m, *m2;
9223         vm_page_t pg;
9224         int i, j, left, pgno, plen, trim;
9225         char *cp, *cp2;
9226
9227         if ((mp->m_flags & M_EXTPG) == 0) {
9228                 m = m_split(mp, xfer, M_WAITOK);
9229                 return (m);
9230         }
9231
9232         /* Find the correct mbuf to split at. */
9233         for (m = mp; m != NULL && xfer > m->m_len; m = m->m_next)
9234                 xfer -= m->m_len;
9235         if (m == NULL)
9236                 return (NULL);
9237
9238         /* If xfer == m->m_len, we can just split the mbuf list. */
9239         if (xfer == m->m_len) {
9240                 m2 = m->m_next;
9241                 m->m_next = NULL;
9242                 return (m2);
9243         }
9244
9245         /* Find the page to split at. */
9246         pgno = 0;
9247         left = xfer;
9248         do {
9249                 if (pgno == 0)
9250                         plen = m_epg_pagelen(m, 0, m->m_epg_1st_off);
9251                 else
9252                         plen = m_epg_pagelen(m, pgno, 0);
9253                 if (left <= plen)
9254                         break;
9255                 left -= plen;
9256                 pgno++;
9257         } while (pgno < m->m_epg_npgs);
9258         if (pgno == m->m_epg_npgs)
9259                 panic("nfsm_split: eroneous ext_pgs mbuf");
9260
9261         m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
9262         m2->m_epg_flags |= EPG_FLAG_ANON;
9263
9264         /*
9265          * If left < plen, allocate a new page for the new mbuf
9266          * and copy the data after left in the page to this new
9267          * page.
9268          */
9269         if (left < plen) {
9270                 pg = vm_page_alloc_noobj(VM_ALLOC_WAITOK | VM_ALLOC_NODUMP |
9271                     VM_ALLOC_WIRED);
9272                 m2->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg);
9273                 m2->m_epg_npgs = 1;
9274
9275                 /* Copy the data after left to the new page. */
9276                 trim = plen - left;
9277                 cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
9278                 if (pgno == 0)
9279                         cp += m->m_epg_1st_off;
9280                 cp += left;
9281                 cp2 = (char *)(void *)PHYS_TO_DMAP(m2->m_epg_pa[0]);
9282                 if (pgno == m->m_epg_npgs - 1)
9283                         m2->m_epg_last_len = trim;
9284                 else {
9285                         cp2 += PAGE_SIZE - trim;
9286                         m2->m_epg_1st_off = PAGE_SIZE - trim;
9287                         m2->m_epg_last_len = m->m_epg_last_len;
9288                 }
9289                 memcpy(cp2, cp, trim);
9290                 m2->m_len = trim;
9291         } else {
9292                 m2->m_len = 0;
9293                 m2->m_epg_last_len = m->m_epg_last_len;
9294         }
9295
9296         /* Move the pages beyond pgno to the new mbuf. */
9297         for (i = pgno + 1, j = m2->m_epg_npgs; i < m->m_epg_npgs; i++, j++) {
9298                 m2->m_epg_pa[j] = m->m_epg_pa[i];
9299                 /* Never moves page 0. */
9300                 m2->m_len += m_epg_pagelen(m, i, 0);
9301         }
9302         m2->m_epg_npgs = j;
9303         m->m_epg_npgs = pgno + 1;
9304         m->m_epg_last_len = left;
9305         m->m_len = xfer;
9306
9307         m2->m_next = m->m_next;
9308         m->m_next = NULL;
9309         return (m2);
9310 }
9311
9312 /*
9313  * Do the NFSv4.1 Bind Connection to Session.
9314  * Called from the reconnect layer of the krpc (sys/rpc/clnt_rc.c).
9315  */
9316 void
9317 nfsrpc_bindconnsess(CLIENT *cl, void *arg, struct ucred *cr)
9318 {
9319         struct nfscl_reconarg *rcp = (struct nfscl_reconarg *)arg;
9320         uint32_t res, *tl;
9321         struct nfsrv_descript nfsd;
9322         struct nfsrv_descript *nd = &nfsd;
9323         struct rpc_callextra ext;
9324         struct timeval utimeout;
9325         enum clnt_stat stat;
9326         int error;
9327
9328         nfscl_reqstart(nd, NFSPROC_BINDCONNTOSESS, NULL, NULL, 0, NULL, NULL,
9329             NFS_VER4, rcp->minorvers, NULL);
9330         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED);
9331         memcpy(tl, rcp->sessionid, NFSX_V4SESSIONID);
9332         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9333         *tl++ = txdr_unsigned(NFSCDFC4_FORE_OR_BOTH);
9334         *tl = newnfs_false;
9335
9336         memset(&ext, 0, sizeof(ext));
9337         utimeout.tv_sec = 30;
9338         utimeout.tv_usec = 0;
9339         ext.rc_auth = authunix_create(cr);
9340         nd->nd_mrep = NULL;
9341         stat = CLNT_CALL_MBUF(cl, &ext, NFSV4PROC_COMPOUND, nd->nd_mreq,
9342             &nd->nd_mrep, utimeout);
9343         AUTH_DESTROY(ext.rc_auth);
9344         if (stat != RPC_SUCCESS) {
9345                 printf("nfsrpc_bindconnsess: call failed stat=%d\n", stat);
9346                 return;
9347         }
9348         if (nd->nd_mrep == NULL) {
9349                 printf("nfsrpc_bindconnsess: no reply args\n");
9350                 return;
9351         }
9352         error = 0;
9353         newnfs_realign(&nd->nd_mrep, M_WAITOK);
9354         nd->nd_md = nd->nd_mrep;
9355         nd->nd_dpos = mtod(nd->nd_md, char *);
9356         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
9357         nd->nd_repstat = fxdr_unsigned(uint32_t, *tl++);
9358         if (nd->nd_repstat == NFSERR_OK) {
9359                 res = fxdr_unsigned(uint32_t, *tl);
9360                 if (res > 0 && (error = nfsm_advance(nd, NFSM_RNDUP(res),
9361                     -1)) != 0)
9362                         goto nfsmout;
9363                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
9364                     4 * NFSX_UNSIGNED);
9365                 tl += 3;
9366                 if (!NFSBCMP(tl, rcp->sessionid, NFSX_V4SESSIONID)) {
9367                         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9368                         res = fxdr_unsigned(uint32_t, *tl);
9369                         if (res != NFSCDFS4_BOTH)
9370                                 printf("nfsrpc_bindconnsess: did not "
9371                                     "return FS4_BOTH\n");
9372                 } else
9373                         printf("nfsrpc_bindconnsess: not same "
9374                             "sessionid\n");
9375         } else if (nd->nd_repstat != NFSERR_BADSESSION)
9376                 printf("nfsrpc_bindconnsess: returned %d\n", nd->nd_repstat);
9377 nfsmout:
9378         if (error != 0)
9379                 printf("nfsrpc_bindconnsess: reply bad xdr\n");
9380         m_freem(nd->nd_mrep);
9381 }
9382
9383 /*
9384  * Do roughly what nfs_statfs() does for NFSv4, but when called with a shared
9385  * locked vnode.
9386  */
9387 static void
9388 nfscl_statfs(struct vnode *vp, struct ucred *cred, NFSPROC_T *td)
9389 {
9390         struct nfsvattr nfsva;
9391         struct nfsfsinfo fs;
9392         struct nfsstatfs sb;
9393         struct mount *mp;
9394         struct nfsmount *nmp;
9395         uint32_t lease;
9396         int attrflag, error;
9397
9398         mp = vp->v_mount;
9399         nmp = VFSTONFS(mp);
9400         error = nfsrpc_statfs(vp, &sb, &fs, &lease, cred, td, &nfsva,
9401             &attrflag);
9402         if (attrflag != 0)
9403                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
9404         if (error == 0) {
9405                 NFSLOCKCLSTATE();
9406                 if (nmp->nm_clp != NULL)
9407                         nmp->nm_clp->nfsc_renew = NFSCL_RENEW(lease);
9408                 NFSUNLOCKCLSTATE();
9409                 mtx_lock(&nmp->nm_mtx);
9410                 nfscl_loadfsinfo(nmp, &fs);
9411                 nfscl_loadsbinfo(nmp, &sb, &mp->mnt_stat);
9412                 mp->mnt_stat.f_iosize = newnfs_iosize(nmp);
9413                 mtx_unlock(&nmp->nm_mtx);
9414         }
9415 }