network - Tokenize NFS, fix MP races
[dragonfly.git] / sys / vfs / nfs / nfs_subs.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
79e5012e 37 * $FreeBSD: /repoman/r/ncvs/src/sys/nfsclient/nfs_subs.c,v 1.128 2004/04/14 23:23:55 peadar Exp $
67863d04 38 * $DragonFly: src/sys/vfs/nfs/nfs_subs.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
984263bc
MD
39 */
40
41/*
42 * These functions support the macros and help fiddle mbuf chains for
43 * the nfs op functions. They do things like create the rpc header and
44 * copy data between mbuf chains and uio lists.
45 */
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/kernel.h>
49#include <sys/buf.h>
50#include <sys/proc.h>
51#include <sys/mount.h>
52#include <sys/vnode.h>
fad57d0e 53#include <sys/nlookup.h>
984263bc
MD
54#include <sys/namei.h>
55#include <sys/mbuf.h>
56#include <sys/socket.h>
57#include <sys/stat.h>
58#include <sys/malloc.h>
59#include <sys/sysent.h>
60#include <sys/syscall.h>
61#include <sys/conf.h>
70aac194 62#include <sys/objcache.h>
984263bc
MD
63
64#include <vm/vm.h>
65#include <vm/vm_object.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_zone.h>
68
3020e3be
MD
69#include <sys/buf2.h>
70
1f2de5d4
MD
71#include "rpcv2.h"
72#include "nfsproto.h"
73#include "nfs.h"
c1cf1e59 74#include "nfsmount.h"
1f2de5d4
MD
75#include "nfsnode.h"
76#include "xdr_subs.h"
77#include "nfsm_subs.h"
1f2de5d4 78#include "nfsrtt.h"
984263bc
MD
79
80#include <netinet/in.h>
81
82/*
83 * Data items converted to xdr at startup, since they are constant
84 * This is kinda hokey, but may save a little time doing byte swaps
85 */
86u_int32_t nfs_xdrneg1;
e97453f3
MD
87u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers;
88u_int32_t rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr;
89u_int32_t rpc_auth_kerb;
e07fef60 90u_int32_t nfs_prog, nfs_true, nfs_false;
984263bc
MD
91
92/* And other global data */
984263bc
MD
93static enum vtype nv2tov_type[8]= {
94 VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON
95};
96enum vtype nv3tov_type[8]= {
97 VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO
98};
99
100int nfs_ticks;
e97453f3 101
c6b43e93
MD
102/*
103 * Protect master lists only. Primary protection uses the per-mount
104 * and per nfssvc_sock tokens.
105 */
106struct lwkt_token nfs_token = LWKT_TOKEN_MP_INITIALIZER(unp_token);
107
e97453f3 108static int nfs_pbuf_freecnt = -1; /* start out unlimited */
984263bc 109
49433307 110struct nfsmount_head nfs_mountq = TAILQ_HEAD_INITIALIZER(nfs_mountq);
984263bc
MD
111struct nfssvc_sockhead nfssvc_sockhead;
112int nfssvc_sockhead_flag;
113struct nfsd_head nfsd_head;
114int nfsd_head_flag;
115struct nfs_bufq nfs_bufq;
984263bc
MD
116struct nqfhhashhead *nqfhhashtbl;
117u_long nqfhhash;
118
984263bc
MD
119static int nfs_prev_nfssvc_sy_narg;
120static sy_call_t *nfs_prev_nfssvc_sy_call;
121
122#ifndef NFS_NOSERVER
123
984263bc
MD
124/*
125 * Mapping of old NFS Version 2 RPC numbers to generic numbers.
126 */
127int nfsv3_procid[NFS_NPROCS] = {
128 NFSPROC_NULL,
129 NFSPROC_GETATTR,
130 NFSPROC_SETATTR,
131 NFSPROC_NOOP,
132 NFSPROC_LOOKUP,
133 NFSPROC_READLINK,
134 NFSPROC_READ,
135 NFSPROC_NOOP,
136 NFSPROC_WRITE,
137 NFSPROC_CREATE,
138 NFSPROC_REMOVE,
139 NFSPROC_RENAME,
140 NFSPROC_LINK,
141 NFSPROC_SYMLINK,
142 NFSPROC_MKDIR,
143 NFSPROC_RMDIR,
144 NFSPROC_READDIR,
145 NFSPROC_FSSTAT,
146 NFSPROC_NOOP,
147 NFSPROC_NOOP,
148 NFSPROC_NOOP,
149 NFSPROC_NOOP,
150 NFSPROC_NOOP,
151 NFSPROC_NOOP,
152 NFSPROC_NOOP,
153 NFSPROC_NOOP
154};
155
156#endif /* NFS_NOSERVER */
157/*
158 * and the reverse mapping from generic to Version 2 procedure numbers
159 */
160int nfsv2_procid[NFS_NPROCS] = {
161 NFSV2PROC_NULL,
162 NFSV2PROC_GETATTR,
163 NFSV2PROC_SETATTR,
164 NFSV2PROC_LOOKUP,
165 NFSV2PROC_NOOP,
166 NFSV2PROC_READLINK,
167 NFSV2PROC_READ,
168 NFSV2PROC_WRITE,
169 NFSV2PROC_CREATE,
170 NFSV2PROC_MKDIR,
171 NFSV2PROC_SYMLINK,
172 NFSV2PROC_CREATE,
173 NFSV2PROC_REMOVE,
174 NFSV2PROC_RMDIR,
175 NFSV2PROC_RENAME,
176 NFSV2PROC_LINK,
177 NFSV2PROC_READDIR,
178 NFSV2PROC_NOOP,
179 NFSV2PROC_STATFS,
180 NFSV2PROC_NOOP,
181 NFSV2PROC_NOOP,
182 NFSV2PROC_NOOP,
183 NFSV2PROC_NOOP,
184 NFSV2PROC_NOOP,
185 NFSV2PROC_NOOP,
186 NFSV2PROC_NOOP,
187};
188
189#ifndef NFS_NOSERVER
190/*
191 * Maps errno values to nfs error numbers.
192 * Use NFSERR_IO as the catch all for ones not specifically defined in
193 * RFC 1094.
194 */
195static u_char nfsrv_v2errmap[ELAST] = {
196 NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO,
197 NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
198 NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO,
199 NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR,
200 NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
201 NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS,
202 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
203 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
204 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
205 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
206 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
207 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
208 NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO,
209 NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE,
210 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
211 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
212 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
213 NFSERR_IO /* << Last is 86 */
214};
215
216/*
217 * Maps errno values to nfs error numbers.
218 * Although it is not obvious whether or not NFS clients really care if
219 * a returned error value is in the specified list for the procedure, the
220 * safest thing to do is filter them appropriately. For Version 2, the
221 * X/Open XNFS document is the only specification that defines error values
222 * for each RPC (The RFC simply lists all possible error values for all RPCs),
223 * so I have decided to not do this for Version 2.
224 * The first entry is the default error return and the rest are the valid
225 * errors for that RPC in increasing numeric order.
226 */
227static short nfsv3err_null[] = {
228 0,
229 0,
230};
231
232static short nfsv3err_getattr[] = {
233 NFSERR_IO,
234 NFSERR_IO,
235 NFSERR_STALE,
236 NFSERR_BADHANDLE,
237 NFSERR_SERVERFAULT,
238 0,
239};
240
241static short nfsv3err_setattr[] = {
242 NFSERR_IO,
243 NFSERR_PERM,
244 NFSERR_IO,
245 NFSERR_ACCES,
246 NFSERR_INVAL,
247 NFSERR_NOSPC,
248 NFSERR_ROFS,
249 NFSERR_DQUOT,
250 NFSERR_STALE,
251 NFSERR_BADHANDLE,
252 NFSERR_NOT_SYNC,
253 NFSERR_SERVERFAULT,
254 0,
255};
256
257static short nfsv3err_lookup[] = {
258 NFSERR_IO,
259 NFSERR_NOENT,
260 NFSERR_IO,
261 NFSERR_ACCES,
262 NFSERR_NOTDIR,
263 NFSERR_NAMETOL,
264 NFSERR_STALE,
265 NFSERR_BADHANDLE,
266 NFSERR_SERVERFAULT,
267 0,
268};
269
270static short nfsv3err_access[] = {
271 NFSERR_IO,
272 NFSERR_IO,
273 NFSERR_STALE,
274 NFSERR_BADHANDLE,
275 NFSERR_SERVERFAULT,
276 0,
277};
278
279static short nfsv3err_readlink[] = {
280 NFSERR_IO,
281 NFSERR_IO,
282 NFSERR_ACCES,
283 NFSERR_INVAL,
284 NFSERR_STALE,
285 NFSERR_BADHANDLE,
286 NFSERR_NOTSUPP,
287 NFSERR_SERVERFAULT,
288 0,
289};
290
291static short nfsv3err_read[] = {
292 NFSERR_IO,
293 NFSERR_IO,
294 NFSERR_NXIO,
295 NFSERR_ACCES,
296 NFSERR_INVAL,
297 NFSERR_STALE,
298 NFSERR_BADHANDLE,
299 NFSERR_SERVERFAULT,
300 0,
301};
302
303static short nfsv3err_write[] = {
304 NFSERR_IO,
305 NFSERR_IO,
306 NFSERR_ACCES,
307 NFSERR_INVAL,
308 NFSERR_FBIG,
309 NFSERR_NOSPC,
310 NFSERR_ROFS,
311 NFSERR_DQUOT,
312 NFSERR_STALE,
313 NFSERR_BADHANDLE,
314 NFSERR_SERVERFAULT,
315 0,
316};
317
318static short nfsv3err_create[] = {
319 NFSERR_IO,
320 NFSERR_IO,
321 NFSERR_ACCES,
322 NFSERR_EXIST,
323 NFSERR_NOTDIR,
324 NFSERR_NOSPC,
325 NFSERR_ROFS,
326 NFSERR_NAMETOL,
327 NFSERR_DQUOT,
328 NFSERR_STALE,
329 NFSERR_BADHANDLE,
330 NFSERR_NOTSUPP,
331 NFSERR_SERVERFAULT,
332 0,
333};
334
335static short nfsv3err_mkdir[] = {
336 NFSERR_IO,
337 NFSERR_IO,
338 NFSERR_ACCES,
339 NFSERR_EXIST,
340 NFSERR_NOTDIR,
341 NFSERR_NOSPC,
342 NFSERR_ROFS,
343 NFSERR_NAMETOL,
344 NFSERR_DQUOT,
345 NFSERR_STALE,
346 NFSERR_BADHANDLE,
347 NFSERR_NOTSUPP,
348 NFSERR_SERVERFAULT,
349 0,
350};
351
352static short nfsv3err_symlink[] = {
353 NFSERR_IO,
354 NFSERR_IO,
355 NFSERR_ACCES,
356 NFSERR_EXIST,
357 NFSERR_NOTDIR,
358 NFSERR_NOSPC,
359 NFSERR_ROFS,
360 NFSERR_NAMETOL,
361 NFSERR_DQUOT,
362 NFSERR_STALE,
363 NFSERR_BADHANDLE,
364 NFSERR_NOTSUPP,
365 NFSERR_SERVERFAULT,
366 0,
367};
368
369static short nfsv3err_mknod[] = {
370 NFSERR_IO,
371 NFSERR_IO,
372 NFSERR_ACCES,
373 NFSERR_EXIST,
374 NFSERR_NOTDIR,
375 NFSERR_NOSPC,
376 NFSERR_ROFS,
377 NFSERR_NAMETOL,
378 NFSERR_DQUOT,
379 NFSERR_STALE,
380 NFSERR_BADHANDLE,
381 NFSERR_NOTSUPP,
382 NFSERR_SERVERFAULT,
383 NFSERR_BADTYPE,
384 0,
385};
386
387static short nfsv3err_remove[] = {
388 NFSERR_IO,
389 NFSERR_NOENT,
390 NFSERR_IO,
391 NFSERR_ACCES,
392 NFSERR_NOTDIR,
393 NFSERR_ROFS,
394 NFSERR_NAMETOL,
395 NFSERR_STALE,
396 NFSERR_BADHANDLE,
397 NFSERR_SERVERFAULT,
398 0,
399};
400
401static short nfsv3err_rmdir[] = {
402 NFSERR_IO,
403 NFSERR_NOENT,
404 NFSERR_IO,
405 NFSERR_ACCES,
406 NFSERR_EXIST,
407 NFSERR_NOTDIR,
408 NFSERR_INVAL,
409 NFSERR_ROFS,
410 NFSERR_NAMETOL,
411 NFSERR_NOTEMPTY,
412 NFSERR_STALE,
413 NFSERR_BADHANDLE,
414 NFSERR_NOTSUPP,
415 NFSERR_SERVERFAULT,
416 0,
417};
418
419static short nfsv3err_rename[] = {
420 NFSERR_IO,
421 NFSERR_NOENT,
422 NFSERR_IO,
423 NFSERR_ACCES,
424 NFSERR_EXIST,
425 NFSERR_XDEV,
426 NFSERR_NOTDIR,
427 NFSERR_ISDIR,
428 NFSERR_INVAL,
429 NFSERR_NOSPC,
430 NFSERR_ROFS,
431 NFSERR_MLINK,
432 NFSERR_NAMETOL,
433 NFSERR_NOTEMPTY,
434 NFSERR_DQUOT,
435 NFSERR_STALE,
436 NFSERR_BADHANDLE,
437 NFSERR_NOTSUPP,
438 NFSERR_SERVERFAULT,
439 0,
440};
441
442static short nfsv3err_link[] = {
443 NFSERR_IO,
444 NFSERR_IO,
445 NFSERR_ACCES,
446 NFSERR_EXIST,
447 NFSERR_XDEV,
448 NFSERR_NOTDIR,
449 NFSERR_INVAL,
450 NFSERR_NOSPC,
451 NFSERR_ROFS,
452 NFSERR_MLINK,
453 NFSERR_NAMETOL,
454 NFSERR_DQUOT,
455 NFSERR_STALE,
456 NFSERR_BADHANDLE,
457 NFSERR_NOTSUPP,
458 NFSERR_SERVERFAULT,
459 0,
460};
461
462static short nfsv3err_readdir[] = {
463 NFSERR_IO,
464 NFSERR_IO,
465 NFSERR_ACCES,
466 NFSERR_NOTDIR,
467 NFSERR_STALE,
468 NFSERR_BADHANDLE,
469 NFSERR_BAD_COOKIE,
470 NFSERR_TOOSMALL,
471 NFSERR_SERVERFAULT,
472 0,
473};
474
475static short nfsv3err_readdirplus[] = {
476 NFSERR_IO,
477 NFSERR_IO,
478 NFSERR_ACCES,
479 NFSERR_NOTDIR,
480 NFSERR_STALE,
481 NFSERR_BADHANDLE,
482 NFSERR_BAD_COOKIE,
483 NFSERR_NOTSUPP,
484 NFSERR_TOOSMALL,
485 NFSERR_SERVERFAULT,
486 0,
487};
488
489static short nfsv3err_fsstat[] = {
490 NFSERR_IO,
491 NFSERR_IO,
492 NFSERR_STALE,
493 NFSERR_BADHANDLE,
494 NFSERR_SERVERFAULT,
495 0,
496};
497
498static short nfsv3err_fsinfo[] = {
499 NFSERR_STALE,
500 NFSERR_STALE,
501 NFSERR_BADHANDLE,
502 NFSERR_SERVERFAULT,
503 0,
504};
505
506static short nfsv3err_pathconf[] = {
507 NFSERR_STALE,
508 NFSERR_STALE,
509 NFSERR_BADHANDLE,
510 NFSERR_SERVERFAULT,
511 0,
512};
513
514static short nfsv3err_commit[] = {
515 NFSERR_IO,
516 NFSERR_IO,
517 NFSERR_STALE,
518 NFSERR_BADHANDLE,
519 NFSERR_SERVERFAULT,
520 0,
521};
522
523static short *nfsrv_v3errmap[] = {
524 nfsv3err_null,
525 nfsv3err_getattr,
526 nfsv3err_setattr,
527 nfsv3err_lookup,
528 nfsv3err_access,
529 nfsv3err_readlink,
530 nfsv3err_read,
531 nfsv3err_write,
532 nfsv3err_create,
533 nfsv3err_mkdir,
534 nfsv3err_symlink,
535 nfsv3err_mknod,
536 nfsv3err_remove,
537 nfsv3err_rmdir,
538 nfsv3err_rename,
539 nfsv3err_link,
540 nfsv3err_readdir,
541 nfsv3err_readdirplus,
542 nfsv3err_fsstat,
543 nfsv3err_fsinfo,
544 nfsv3err_pathconf,
545 nfsv3err_commit,
546};
547
548#endif /* NFS_NOSERVER */
549
984263bc 550struct nfssvc_args;
753fd850 551extern int sys_nfssvc(struct proc *, struct nfssvc_args *, int *);
984263bc 552
2cb16efc
MD
553/*
554 * This needs to return a monotonically increasing or close to monotonically
555 * increasing result, otherwise the write gathering queues won't work
556 * properly.
557 */
984263bc 558u_quad_t
e851b29e 559nfs_curusec(void)
984263bc
MD
560{
561 struct timeval tv;
562
2cb16efc 563 getmicrouptime(&tv);
984263bc
MD
564 return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec);
565}
566
567/*
984263bc
MD
568 * Called once to initialize data structures...
569 */
570int
e851b29e 571nfs_init(struct vfsconf *vfsp)
984263bc 572{
f786cc86 573 callout_init(&nfs_timer_handle);
984263bc
MD
574 nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1);
575
576 nfs_mount_type = vfsp->vfc_typenum;
577 nfsrtt.pos = 0;
578 rpc_vers = txdr_unsigned(RPC_VER2);
579 rpc_call = txdr_unsigned(RPC_CALL);
580 rpc_reply = txdr_unsigned(RPC_REPLY);
581 rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
582 rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
583 rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
584 rpc_autherr = txdr_unsigned(RPC_AUTHERR);
585 rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
586 rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
587 nfs_prog = txdr_unsigned(NFS_PROG);
984263bc
MD
588 nfs_true = txdr_unsigned(TRUE);
589 nfs_false = txdr_unsigned(FALSE);
590 nfs_xdrneg1 = txdr_unsigned(-1);
591 nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
592 if (nfs_ticks < 1)
593 nfs_ticks = 1;
984263bc
MD
594 nfs_nhinit(); /* Init the nfsnode table */
595#ifndef NFS_NOSERVER
596 nfsrv_init(0); /* Init server data structures */
597 nfsrv_initcache(); /* Init the server request cache */
598#endif
599
600 /*
b9a7a2bd
MD
601 * Mainly for vkernel operation. If memory is severely limited
602 */
603 if (nfs_maxasyncbio > nmbclusters * MCLBYTES / NFS_MAXDATA / 3)
604 nfs_maxasyncbio = nmbclusters * MCLBYTES / NFS_MAXDATA / 3;
605 if (nfs_maxasyncbio < 4)
606 nfs_maxasyncbio = 4;
607
608 /*
984263bc
MD
609 * Initialize reply list and start timer
610 */
c6b43e93 611 nfs_timer_callout(0);
984263bc 612
984263bc
MD
613 nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg;
614 sysent[SYS_nfssvc].sy_narg = 2;
615 nfs_prev_nfssvc_sy_call = sysent[SYS_nfssvc].sy_call;
753fd850 616 sysent[SYS_nfssvc].sy_call = (sy_call_t *)sys_nfssvc;
984263bc
MD
617
618 nfs_pbuf_freecnt = nswbuf / 2 + 1;
619
620 return (0);
621}
622
623int
e851b29e 624nfs_uninit(struct vfsconf *vfsp)
984263bc 625{
f786cc86 626 callout_stop(&nfs_timer_handle);
984263bc 627 nfs_mount_type = -1;
984263bc
MD
628 sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg;
629 sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call;
630 return (0);
631}
632
633/*
634 * Attribute cache routines.
635 * nfs_loadattrcache() - loads or updates the cache contents from attributes
636 * that are on the mbuf list
637 * nfs_getattrcache() - returns valid attributes if found in cache, returns
638 * error otherwise
639 */
640
641/*
642 * Load the attribute cache (that lives in the nfsnode entry) with
5a9187cb
MD
643 * the values on the mbuf list. Load *vaper with the attributes. vaper
644 * may be NULL.
645 *
646 * As a side effect n_mtime, which we use to determine if the file was
647 * modified by some other host, is set to the attribute timestamp and
648 * NRMODIFIED is set if the two values differ.
649 *
650 * WARNING: the mtime loaded into vaper does not necessarily represent
651 * n_mtime or n_attr.mtime due to NACC and NUPD.
984263bc
MD
652 */
653int
42edf14f 654nfs_loadattrcache(struct vnode *vp, struct mbuf **mdp, caddr_t *dposp,
5a9187cb 655 struct vattr *vaper, int lattr_flags)
984263bc 656{
40393ded
RG
657 struct vattr *vap;
658 struct nfs_fattr *fp;
659 struct nfsnode *np;
660 int32_t t1;
984263bc 661 caddr_t cp2;
e4c9c0c8 662 int error = 0;
0e9b9130 663 int rmajor, rminor;
e4c9c0c8 664 udev_t rdev;
984263bc
MD
665 struct mbuf *md;
666 enum vtype vtyp;
667 u_short vmode;
668 struct timespec mtime;
669 int v3 = NFS_ISV3(vp);
670
671 md = *mdp;
672 t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
673 if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) != 0)
674 return (error);
675 fp = (struct nfs_fattr *)cp2;
676 if (v3) {
677 vtyp = nfsv3tov_type(fp->fa_type);
678 vmode = fxdr_unsigned(u_short, fp->fa_mode);
0e9b9130
MD
679 rmajor = (int)fxdr_unsigned(int, fp->fa3_rdev.specdata1);
680 rminor = (int)fxdr_unsigned(int, fp->fa3_rdev.specdata2);
984263bc
MD
681 fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
682 } else {
683 vtyp = nfsv2tov_type(fp->fa_type);
684 vmode = fxdr_unsigned(u_short, fp->fa_mode);
685 /*
686 * XXX
687 *
688 * The duplicate information returned in fa_type and fa_mode
689 * is an ambiguity in the NFS version 2 protocol.
690 *
691 * VREG should be taken literally as a regular file. If a
692 * server intents to return some type information differently
693 * in the upper bits of the mode field (e.g. for sockets, or
694 * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we
695 * leave the examination of the mode bits even in the VREG
696 * case to avoid breakage for bogus servers, but we make sure
697 * that there are actually type bits set in the upper part of
698 * fa_mode (and failing that, trust the va_type field).
699 *
700 * NFSv3 cleared the issue, and requires fa_mode to not
701 * contain any type information (while also introduing sockets
702 * and FIFOs for fa_type).
703 */
704 if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0))
705 vtyp = IFTOVT(vmode);
706 rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
0e9b9130
MD
707 rmajor = umajor(rdev);
708 rminor = uminor(rdev);
984263bc
MD
709 fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
710
711 /*
712 * Really ugly NFSv2 kludge.
713 */
e4c9c0c8 714 if (vtyp == VCHR && rdev == (udev_t)0xffffffff)
984263bc
MD
715 vtyp = VFIFO;
716 }
717
718 /*
719 * If v_type == VNON it is a new node, so fill in the v_type,
720 * n_mtime fields. Check to see if it represents a special
721 * device, and if so, check for a possible alias. Once the
722 * correct vnode has been obtained, fill in the rest of the
723 * information.
724 */
725 np = VTONFS(vp);
726 if (vp->v_type != vtyp) {
1c843a13 727 nfs_setvtype(vp, vtyp);
984263bc 728 if (vp->v_type == VFIFO) {
6ddb7618 729 vp->v_ops = &vp->v_mount->mnt_vn_fifo_ops;
0961aa92 730 } else if (vp->v_type == VCHR || vp->v_type == VBLK) {
6ddb7618 731 vp->v_ops = &vp->v_mount->mnt_vn_spec_ops;
0e9b9130 732 addaliasu(vp, rmajor, rminor);
0961aa92 733 } else {
6ddb7618 734 vp->v_ops = &vp->v_mount->mnt_vn_use_ops;
984263bc
MD
735 }
736 np->n_mtime = mtime.tv_sec;
5a9187cb 737 } else if (np->n_mtime != mtime.tv_sec) {
9793c819 738 /*
5a9187cb
MD
739 * If we haven't modified the file locally and the server
740 * timestamp does not match, then the server probably
741 * modified the file. We must flag this condition so
742 * the proper syncnronization can be done. We do not
743 * try to synchronize the state here because that
744 * could lead to an endless recursion.
745 *
746 * XXX loadattrcache can be set during the reply to a write,
747 * before the write timestamp is properly processed. To
748 * avoid unconditionally setting the rmodified bit (which
749 * has the effect of flushing the cache), we only do this
750 * check if the lmodified bit is not set.
9793c819
MD
751 */
752 np->n_mtime = mtime.tv_sec;
5a9187cb
MD
753 if ((lattr_flags & NFS_LATTR_NOMTIMECHECK) == 0)
754 np->n_flag |= NRMODIFIED;
984263bc
MD
755 }
756 vap = &np->n_vattr;
757 vap->va_type = vtyp;
758 vap->va_mode = (vmode & 07777);
0e9b9130
MD
759 vap->va_rmajor = rmajor;
760 vap->va_rminor = rminor;
984263bc
MD
761 vap->va_mtime = mtime;
762 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
763 if (v3) {
764 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
765 vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
766 vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
767 vap->va_size = fxdr_hyper(&fp->fa3_size);
768 vap->va_blocksize = NFS_FABLKSIZE;
769 vap->va_bytes = fxdr_hyper(&fp->fa3_used);
50626622 770 vap->va_fileid = fxdr_hyper(&fp->fa3_fileid);
984263bc
MD
771 fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
772 fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
773 vap->va_flags = 0;
774 vap->va_filerev = 0;
775 } else {
776 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
777 vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
778 vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
779 vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
780 vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize);
781 vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks)
782 * NFS_FABLKSIZE;
783 vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
784 fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
785 vap->va_flags = 0;
786 vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
787 fp->fa2_ctime.nfsv2_sec);
788 vap->va_ctime.tv_nsec = 0;
789 vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
790 vap->va_filerev = 0;
791 }
792 np->n_attrstamp = time_second;
793 if (vap->va_size != np->n_size) {
794 if (vap->va_type == VREG) {
8452310f
MD
795 /*
796 * Get rid of all the junk we had before and just
797 * set NRMODIFIED if NLMODIFIED is 0. Depend on
798 * occassionally flushing our dirty buffers to
799 * clear both the NLMODIFIED and NRMODIFIED flags.
800 */
801 if ((np->n_flag & NLMODIFIED) == 0)
802 np->n_flag |= NRMODIFIED;
803#if 0
5a9187cb
MD
804 if ((lattr_flags & NFS_LATTR_NOSHRINK) &&
805 vap->va_size < np->n_size) {
984263bc
MD
806 /*
807 * We've been told not to shrink the file;
808 * zero np->n_attrstamp to indicate that
809 * the attributes are stale.
5a9187cb
MD
810 *
811 * This occurs primarily due to recursive
812 * NFS ops that are executed during periods
813 * where we cannot safely reduce the size of
814 * the file.
815 *
816 * Additionally, write rpcs are broken down
817 * into buffers and np->n_size is
818 * pre-extended. Setting NRMODIFIED here
819 * can result in n_size getting reset to a
820 * lower value, which is NOT what we want.
821 * XXX this needs to be cleaned up a lot
822 * more.
984263bc
MD
823 */
824 vap->va_size = np->n_size;
825 np->n_attrstamp = 0;
5a9187cb
MD
826 if ((np->n_flag & NLMODIFIED) == 0)
827 np->n_flag |= NRMODIFIED;
828 } else if (np->n_flag & NLMODIFIED) {
79e5012e
MD
829 /*
830 * We've modified the file: Use the larger
5a9187cb
MD
831 * of our size, and the server's size. At
832 * this point the cache coherency is all
833 * shot to hell. To try to handle multiple
834 * clients appending to the file at the same
835 * time mark that the server has changed
836 * the file if the server's notion of the
837 * file size is larger then our notion.
838 *
839 * XXX this needs work.
79e5012e
MD
840 */
841 if (vap->va_size < np->n_size) {
984263bc 842 vap->va_size = np->n_size;
79e5012e 843 } else {
984263bc 844 np->n_size = vap->va_size;
5a9187cb 845 np->n_flag |= NRMODIFIED;
79e5012e 846 }
984263bc 847 } else {
5a9187cb
MD
848 /*
849 * Someone changed the file's size on the
850 * server and there are no local changes
851 * to get in the way, set the size and mark
852 * it.
853 */
984263bc 854 np->n_size = vap->va_size;
5a9187cb 855 np->n_flag |= NRMODIFIED;
984263bc 856 }
3bb7eedb 857 nvnode_pager_setsize(vp, np->n_size, XXX);
8452310f 858#endif
984263bc
MD
859 } else {
860 np->n_size = vap->va_size;
861 }
862 }
863 if (vaper != NULL) {
864 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
865 if (np->n_flag & NCHG) {
866 if (np->n_flag & NACC)
867 vaper->va_atime = np->n_atim;
868 if (np->n_flag & NUPD)
869 vaper->va_mtime = np->n_mtim;
870 }
871 }
872 return (0);
873}
874
875#ifdef NFS_ACDEBUG
876#include <sys/sysctl.h>
877SYSCTL_DECL(_vfs_nfs);
878static int nfs_acdebug;
879SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, "");
880#endif
881
882/*
883 * Check the time stamp
884 * If the cache is valid, copy contents to *vap and return 0
885 * otherwise return an error
886 */
887int
e851b29e 888nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
984263bc 889{
40393ded
RG
890 struct nfsnode *np;
891 struct vattr *vap;
984263bc
MD
892 struct nfsmount *nmp;
893 int timeo;
894
895 np = VTONFS(vp);
896 vap = &np->n_vattr;
897 nmp = VFSTONFS(vp->v_mount);
fad57d0e
MD
898
899 /*
900 * Dynamic timeout based on how recently the file was modified.
5a9187cb 901 * n_mtime is always valid.
fad57d0e 902 */
97100839 903 timeo = (get_approximate_time_t() - np->n_mtime) / 60;
984263bc
MD
904
905#ifdef NFS_ACDEBUG
906 if (nfs_acdebug>1)
086c1d7e 907 kprintf("nfs_getattrcache: initial timeo = %d\n", timeo);
984263bc
MD
908#endif
909
910 if (vap->va_type == VDIR) {
5a9187cb 911 if ((np->n_flag & NLMODIFIED) || timeo < nmp->nm_acdirmin)
984263bc
MD
912 timeo = nmp->nm_acdirmin;
913 else if (timeo > nmp->nm_acdirmax)
914 timeo = nmp->nm_acdirmax;
915 } else {
5a9187cb 916 if ((np->n_flag & NLMODIFIED) || timeo < nmp->nm_acregmin)
984263bc
MD
917 timeo = nmp->nm_acregmin;
918 else if (timeo > nmp->nm_acregmax)
919 timeo = nmp->nm_acregmax;
920 }
921
922#ifdef NFS_ACDEBUG
923 if (nfs_acdebug > 2)
086c1d7e 924 kprintf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
984263bc
MD
925 nmp->nm_acregmin, nmp->nm_acregmax,
926 nmp->nm_acdirmin, nmp->nm_acdirmax);
927
928 if (nfs_acdebug)
086c1d7e 929 kprintf("nfs_getattrcache: age = %d; final timeo = %d\n",
9793c819 930 (int)(time_second - np->n_attrstamp), timeo);
984263bc
MD
931#endif
932
9793c819 933 if (np->n_attrstamp == 0 || (time_second - np->n_attrstamp) >= timeo) {
984263bc
MD
934 nfsstats.attrcache_misses++;
935 return (ENOENT);
936 }
937 nfsstats.attrcache_hits++;
5a9187cb
MD
938
939 /*
940 * Our attribute cache can be stale due to modifications made on
941 * this host. XXX this is a bad hack. We need a more deterministic
942 * means of finding out which np fields are valid verses attr cache
943 * fields. We really should update the vattr info on the fly when
944 * making local changes.
945 */
984263bc
MD
946 if (vap->va_size != np->n_size) {
947 if (vap->va_type == VREG) {
8452310f
MD
948 if (np->n_flag & NLMODIFIED)
949 vap->va_size = np->n_size;
950 nfs_meta_setsize(vp, curthread, vap->va_size, 0);
984263bc
MD
951 } else {
952 np->n_size = vap->va_size;
953 }
954 }
955 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
956 if (np->n_flag & NCHG) {
957 if (np->n_flag & NACC)
958 vaper->va_atime = np->n_atim;
959 if (np->n_flag & NUPD)
960 vaper->va_mtime = np->n_mtim;
961 }
962 return (0);
963}
964
965#ifndef NFS_NOSERVER
fad57d0e 966
984263bc
MD
967/*
968 * Set up nameidata for a lookup() call and do it.
969 *
970 * If pubflag is set, this call is done for a lookup operation on the
971 * public filehandle. In that case we allow crossing mountpoints and
972 * absolute pathnames. However, the caller is expected to check that
973 * the lookup result is within the public fs, and deny access if
974 * it is not.
975 *
984263bc
MD
976 * dirp may be set whether an error is returned or not, and must be
977 * released by the caller.
fad57d0e 978 *
28623bf9 979 * On return nd->nl_nch usually points to the target ncp, which may represent
fad57d0e
MD
980 * a negative hit.
981 *
982 * NOTE: the caller must call nlookup_done(nd) unconditionally on return
983 * to cleanup.
984263bc
MD
984 */
985int
3a907475 986nfs_namei(struct nlookupdata *nd, struct ucred *cred, int nflags,
fad57d0e
MD
987 struct vnode **dvpp, struct vnode **vpp,
988 fhandle_t *fhp, int len,
989 struct nfssvc_sock *slp, struct sockaddr *nam, struct mbuf **mdp,
990 caddr_t *dposp, struct vnode **dirpp, struct thread *td,
991 int kerbflag, int pubflag)
984263bc 992{
40393ded
RG
993 int i, rem;
994 struct mbuf *md;
995 char *fromcp, *tocp, *cp;
fad57d0e 996 char *namebuf;
28623bf9 997 struct nchandle nch;
984263bc 998 struct vnode *dp;
67863d04 999 struct mount *mp;
fad57d0e 1000 int error, rdonly;
984263bc 1001
70aac194 1002 namebuf = objcache_get(namei_oc, M_WAITOK);
fad57d0e 1003 *dirpp = NULL;
984263bc
MD
1004
1005 /*
fad57d0e 1006 * Copy the name from the mbuf list to namebuf.
984263bc
MD
1007 */
1008 fromcp = *dposp;
fad57d0e 1009 tocp = namebuf;
984263bc
MD
1010 md = *mdp;
1011 rem = mtod(md, caddr_t) + md->m_len - fromcp;
1012 for (i = 0; i < len; i++) {
1013 while (rem == 0) {
1014 md = md->m_next;
1015 if (md == NULL) {
1016 error = EBADRPC;
1017 goto out;
1018 }
1019 fromcp = mtod(md, caddr_t);
1020 rem = md->m_len;
1021 }
1022 if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
1023 error = EACCES;
1024 goto out;
1025 }
1026 *tocp++ = *fromcp++;
1027 rem--;
1028 }
1029 *tocp = '\0';
1030 *mdp = md;
1031 *dposp = fromcp;
1032 len = nfsm_rndup(len)-len;
1033 if (len > 0) {
1034 if (rem >= len)
1035 *dposp += len;
1036 else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
1037 goto out;
1038 }
1039
1040 /*
fad57d0e
MD
1041 * Extract and set starting directory. The returned dp is refd
1042 * but not locked.
984263bc 1043 */
67863d04 1044 error = nfsrv_fhtovp(fhp, FALSE, &mp, &dp, cred, slp,
fad57d0e 1045 nam, &rdonly, kerbflag, pubflag);
984263bc
MD
1046 if (error)
1047 goto out;
1048 if (dp->v_type != VDIR) {
1049 vrele(dp);
1050 error = ENOTDIR;
1051 goto out;
1052 }
1053
984263bc
MD
1054 /*
1055 * Set return directory. Reference to dp is implicitly transfered
fad57d0e
MD
1056 * to the returned pointer. This must be set before we potentially
1057 * goto out below.
984263bc 1058 */
fad57d0e 1059 *dirpp = dp;
984263bc 1060
d64fd6d3
MD
1061 /*
1062 * read-only - NLC_DELETE, NLC_RENAME_DST are disallowed. NLC_CREATE
1063 * is passed through to nlookup() and will be disallowed
1064 * if the file does not already exist.
1065 */
1066 if (rdonly) {
1067 nflags |= NLC_NFS_RDONLY;
1068 if (nflags & (NLC_DELETE | NLC_RENAME_DST)) {
1069 error = EROFS;
1070 goto out;
1071 }
1072 }
1073
1074 /*
1075 * Oh joy. For WebNFS, handle those pesky '%' escapes,
1076 * and the 'native path' indicator.
1077 */
984263bc 1078 if (pubflag) {
70aac194 1079 cp = objcache_get(namei_oc, M_WAITOK);
fad57d0e 1080 fromcp = namebuf;
984263bc
MD
1081 tocp = cp;
1082 if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
1083 switch ((unsigned char)*fromcp) {
1084 case WEBNFS_NATIVE_CHAR:
1085 /*
1086 * 'Native' path for us is the same
1087 * as a path according to the NFS spec,
1088 * just skip the escape char.
1089 */
1090 fromcp++;
1091 break;
1092 /*
1093 * More may be added in the future, range 0x80-0xff
1094 */
1095 default:
1096 error = EIO;
70aac194 1097 objcache_put(namei_oc, cp);
984263bc
MD
1098 goto out;
1099 }
1100 }
1101 /*
1102 * Translate the '%' escapes, URL-style.
1103 */
1104 while (*fromcp != '\0') {
1105 if (*fromcp == WEBNFS_ESC_CHAR) {
1106 if (fromcp[1] != '\0' && fromcp[2] != '\0') {
1107 fromcp++;
1108 *tocp++ = HEXSTRTOI(fromcp);
1109 fromcp += 2;
1110 continue;
1111 } else {
1112 error = ENOENT;
70aac194 1113 objcache_put(namei_oc, cp);
984263bc
MD
1114 goto out;
1115 }
1116 } else
1117 *tocp++ = *fromcp++;
1118 }
1119 *tocp = '\0';
70aac194 1120 objcache_put(namei_oc, namebuf);
fad57d0e 1121 namebuf = cp;
984263bc
MD
1122 }
1123
fad57d0e
MD
1124 /*
1125 * Setup for search. We need to get a start directory from dp. Note
1126 * that dp is ref'd, but we no longer 'own' the ref (*dirpp owns it).
1127 */
1128 if (pubflag == 0) {
3a907475
MD
1129 nflags |= NLC_NFS_NOSOFTLINKTRAV;
1130 nflags |= NLC_NOCROSSMOUNT;
984263bc
MD
1131 }
1132
1133 /*
fad57d0e
MD
1134 * We need a starting ncp from the directory vnode dp. dp must not
1135 * be locked. The returned ncp will be refd but not locked.
1136 *
1137 * If no suitable ncp is found we instruct cache_fromdvp() to create
1138 * one. If this fails the directory has probably been removed while
1139 * the target was chdir'd into it and any further lookup will fail.
984263bc 1140 */
28623bf9 1141 if ((error = cache_fromdvp(dp, cred, 1, &nch)) != 0)
fad57d0e 1142 goto out;
3a907475 1143 nlookup_init_raw(nd, namebuf, UIO_SYSSPACE, nflags, cred, &nch);
28623bf9 1144 cache_drop(&nch);
984263bc 1145
fad57d0e
MD
1146 /*
1147 * Ok, do the lookup.
1148 */
1149 error = nlookup(nd);
984263bc 1150
fad57d0e
MD
1151 /*
1152 * If no error occured return the requested dvpp and vpp. If
28623bf9 1153 * NLC_CREATE was specified nd->nl_nch may represent a negative
fad57d0e
MD
1154 * cache hit in which case we do not attempt to obtain the vp.
1155 */
1156 if (error == 0) {
fad57d0e 1157 if (dvpp) {
443472eb 1158 if (nd->nl_nch.ncp->nc_parent) {
28623bf9
MD
1159 nch = nd->nl_nch;
1160 nch.ncp = nch.ncp->nc_parent;
2247fe02
MD
1161 cache_hold(&nch);
1162 cache_lock(&nch);
28623bf9
MD
1163 error = cache_vget(&nch, nd->nl_cred,
1164 LK_EXCLUSIVE, dvpp);
2247fe02 1165 cache_put(&nch);
fad57d0e
MD
1166 } else {
1167 error = ENXIO;
1168 }
984263bc 1169 }
28623bf9
MD
1170 if (vpp && nd->nl_nch.ncp->nc_vp) {
1171 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, vpp);
984263bc 1172 }
984263bc 1173 if (error) {
fad57d0e
MD
1174 if (dvpp && *dvpp) {
1175 vput(*dvpp);
1176 *dvpp = NULL;
1177 }
1178 if (vpp && *vpp) {
1179 vput(*vpp);
1180 *vpp = NULL;
1181 }
984263bc 1182 }
984263bc
MD
1183 }
1184
1185 /*
fad57d0e 1186 * Finish up.
984263bc
MD
1187 */
1188out:
70aac194 1189 objcache_put(namei_oc, namebuf);
984263bc
MD
1190 return (error);
1191}
1192
1193/*
984263bc
MD
1194 * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
1195 * - look up fsid in mount list (if not found ret error)
1196 * - get vp and export rights by calling VFS_FHTOVP()
1197 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
a11aaa81 1198 * - if not lockflag unlock it with vn_unlock()
984263bc
MD
1199 */
1200int
67863d04
MD
1201nfsrv_fhtovp(fhandle_t *fhp, int lockflag,
1202 struct mount **mpp, struct vnode **vpp,
e851b29e
CP
1203 struct ucred *cred, struct nfssvc_sock *slp, struct sockaddr *nam,
1204 int *rdonlyp, int kerbflag, int pubflag)
984263bc 1205{
40393ded
RG
1206 struct mount *mp;
1207 int i;
984263bc
MD
1208 struct ucred *credanon;
1209 int error, exflags;
1210#ifdef MNT_EXNORESPORT /* XXX needs mountd and /etc/exports help yet */
1211 struct sockaddr_int *saddr;
1212#endif
1213
67863d04
MD
1214 *vpp = NULL;
1215 *mpp = NULL;
984263bc
MD
1216
1217 if (nfs_ispublicfh(fhp)) {
1218 if (!pubflag || !nfs_pub.np_valid)
1219 return (ESTALE);
1220 fhp = &nfs_pub.np_handle;
1221 }
1222
67863d04
MD
1223 mp = *mpp = vfs_getvfs(&fhp->fh_fsid);
1224 if (mp == NULL)
984263bc
MD
1225 return (ESTALE);
1226 error = VFS_CHECKEXP(mp, nam, &exflags, &credanon);
1227 if (error)
1228 return (error);
67863d04 1229 error = VFS_FHTOVP(mp, NULL, &fhp->fh_fid, vpp);
984263bc
MD
1230 if (error)
1231 return (error);
1232#ifdef MNT_EXNORESPORT
1233 if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) {
1234 saddr = (struct sockaddr_in *)nam;
1235 if (saddr->sin_family == AF_INET &&
1236 ntohs(saddr->sin_port) >= IPPORT_RESERVED) {
1237 vput(*vpp);
1238 *vpp = NULL;
1239 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
1240 }
1241 }
1242#endif
1243 /*
1244 * Check/setup credentials.
1245 */
1246 if (exflags & MNT_EXKERB) {
1247 if (!kerbflag) {
1248 vput(*vpp);
1249 *vpp = NULL;
1250 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
1251 }
1252 } else if (kerbflag) {
1253 vput(*vpp);
1254 *vpp = NULL;
1255 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
1256 } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
1257 cred->cr_uid = credanon->cr_uid;
1258 for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
1259 cred->cr_groups[i] = credanon->cr_groups[i];
1260 cred->cr_ngroups = i;
1261 }
1262 if (exflags & MNT_EXRDONLY)
1263 *rdonlyp = 1;
1264 else
1265 *rdonlyp = 0;
1266
984263bc 1267 if (!lockflag)
a11aaa81 1268 vn_unlock(*vpp);
984263bc
MD
1269 return (0);
1270}
1271
984263bc
MD
1272/*
1273 * WebNFS: check if a filehandle is a public filehandle. For v3, this
1274 * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
1275 * transformed this to all zeroes in both cases, so check for it.
1276 */
1277int
e851b29e 1278nfs_ispublicfh(fhandle_t *fhp)
984263bc
MD
1279{
1280 char *cp = (char *)fhp;
1281 int i;
1282
1283 for (i = 0; i < NFSX_V3FH; i++)
1284 if (*cp++ != 0)
1285 return (FALSE);
1286 return (TRUE);
1287}
1288
1289#endif /* NFS_NOSERVER */
1290/*
1291 * This function compares two net addresses by family and returns TRUE
1292 * if they are the same host.
1293 * If there is any doubt, return FALSE.
1294 * The AF_INET family is handled as a special case so that address mbufs
1295 * don't need to be saved to store "struct in_addr", which is only 4 bytes.
1296 */
1297int
e851b29e 1298netaddr_match(int family, union nethostaddr *haddr, struct sockaddr *nam)
984263bc 1299{
40393ded 1300 struct sockaddr_in *inetaddr;
984263bc
MD
1301
1302 switch (family) {
1303 case AF_INET:
1304 inetaddr = (struct sockaddr_in *)nam;
1305 if (inetaddr->sin_family == AF_INET &&
1306 inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
1307 return (1);
1308 break;
1309 default:
1310 break;
1311 };
1312 return (0);
1313}
1314
1315static nfsuint64 nfs_nullcookie = { { 0, 0 } };
1316/*
1317 * This function finds the directory cookie that corresponds to the
1318 * logical byte offset given.
1319 */
1320nfsuint64 *
e851b29e 1321nfs_getcookie(struct nfsnode *np, off_t off, int add)
984263bc 1322{
40393ded
RG
1323 struct nfsdmap *dp, *dp2;
1324 int pos;
984263bc
MD
1325
1326 pos = (uoff_t)off / NFS_DIRBLKSIZ;
1327 if (pos == 0 || off < 0) {
1328#ifdef DIAGNOSTIC
1329 if (add)
1330 panic("nfs getcookie add at <= 0");
1331#endif
1332 return (&nfs_nullcookie);
1333 }
1334 pos--;
1335 dp = np->n_cookies.lh_first;
1336 if (!dp) {
1337 if (add) {
1338 MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap),
1339 M_NFSDIROFF, M_WAITOK);
1340 dp->ndm_eocookie = 0;
1341 LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
1342 } else
60233e58 1343 return (NULL);
984263bc
MD
1344 }
1345 while (pos >= NFSNUMCOOKIES) {
1346 pos -= NFSNUMCOOKIES;
1347 if (dp->ndm_list.le_next) {
1348 if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
1349 pos >= dp->ndm_eocookie)
60233e58 1350 return (NULL);
984263bc
MD
1351 dp = dp->ndm_list.le_next;
1352 } else if (add) {
1353 MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
1354 M_NFSDIROFF, M_WAITOK);
1355 dp2->ndm_eocookie = 0;
1356 LIST_INSERT_AFTER(dp, dp2, ndm_list);
1357 dp = dp2;
1358 } else
60233e58 1359 return (NULL);
984263bc
MD
1360 }
1361 if (pos >= dp->ndm_eocookie) {
1362 if (add)
1363 dp->ndm_eocookie = pos + 1;
1364 else
60233e58 1365 return (NULL);
984263bc
MD
1366 }
1367 return (&dp->ndm_cookies[pos]);
1368}
1369
1370/*
1371 * Invalidate cached directory information, except for the actual directory
1372 * blocks (which are invalidated separately).
1373 * Done mainly to avoid the use of stale offset cookies.
1374 */
1375void
e851b29e 1376nfs_invaldir(struct vnode *vp)
984263bc 1377{
40393ded 1378 struct nfsnode *np = VTONFS(vp);
984263bc
MD
1379
1380#ifdef DIAGNOSTIC
1381 if (vp->v_type != VDIR)
1382 panic("nfs: invaldir not dir");
1383#endif
1384 np->n_direofoffset = 0;
1385 np->n_cookieverf.nfsuquad[0] = 0;
1386 np->n_cookieverf.nfsuquad[1] = 0;
1387 if (np->n_cookies.lh_first)
1388 np->n_cookies.lh_first->ndm_eocookie = 0;
1389}
1390
1391/*
1c843a13
MD
1392 * Set the v_type field for an NFS client's vnode and initialize for
1393 * buffer cache operations if necessary.
1394 */
1395void
1396nfs_setvtype(struct vnode *vp, enum vtype vtyp)
1397{
1398 vp->v_type = vtyp;
1399
1400 switch(vtyp) {
1401 case VREG:
1402 case VDIR:
1403 case VLNK:
b0d18f7d
MD
1404 /*
1405 * Needs VMIO, size not yet known, and blocksize
1406 * is not really relevant if we are passing a
1407 * filesize of 0.
1408 */
1409 vinitvmio(vp, 0, PAGE_SIZE, -1);
1c843a13
MD
1410 break;
1411 default:
1412 break;
1413 }
1414}
1415
1416/*
984263bc
MD
1417 * The write verifier has changed (probably due to a server reboot), so all
1418 * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
1419 * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
1420 * and B_CLUSTEROK flags. Once done the new write verifier can be set for the
1421 * mount point.
1422 *
1423 * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data
1424 * writes are not clusterable.
1425 */
6bae6177
MD
1426
1427static int nfs_clearcommit_bp(struct buf *bp, void *data __unused);
0202303b
MD
1428static int nfs_clearcommit_callback(struct mount *mp, struct vnode *vp,
1429 void *data __unused);
6bae6177 1430
984263bc 1431void
e851b29e 1432nfs_clearcommit(struct mount *mp)
984263bc 1433{
0202303b
MD
1434 vmntvnodescan(mp, VMSC_NOWAIT, nfs_clearcommit_callback, NULL, NULL);
1435}
1436
1437static int
1438nfs_clearcommit_callback(struct mount *mp, struct vnode *vp,
1439 void *data __unused)
1440{
0202303b 1441 vhold(vp);
3b998fa9 1442 lwkt_gettoken(&vp->v_token);
0202303b
MD
1443 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL,
1444 nfs_clearcommit_bp, NULL);
3b998fa9 1445 lwkt_reltoken(&vp->v_token);
0202303b
MD
1446 vdrop(vp);
1447 return(0);
984263bc
MD
1448}
1449
6bae6177
MD
1450static int
1451nfs_clearcommit_bp(struct buf *bp, void *data __unused)
1452{
1453 if (BUF_REFCNT(bp) == 0 &&
1454 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
1455 == (B_DELWRI | B_NEEDCOMMIT)) {
1456 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
1457 }
1458 return(0);
1459}
1460
984263bc
MD
1461#ifndef NFS_NOSERVER
1462/*
1463 * Map errnos to NFS error numbers. For Version 3 also filter out error
1464 * numbers not specified for the associated procedure.
1465 */
1466int
e851b29e 1467nfsrv_errmap(struct nfsrv_descript *nd, int err)
984263bc 1468{
40393ded 1469 short *defaulterrp, *errp;
984263bc
MD
1470
1471 if (nd->nd_flag & ND_NFSV3) {
1472 if (nd->nd_procnum <= NFSPROC_COMMIT) {
1473 errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
1474 while (*++errp) {
1475 if (*errp == err)
1476 return (err);
1477 else if (*errp > err)
1478 break;
1479 }
1480 return ((int)*defaulterrp);
1481 } else
1482 return (err & 0xffff);
1483 }
1484 if (err <= ELAST)
1485 return ((int)nfsrv_v2errmap[err - 1]);
1486 return (NFSERR_IO);
1487}
1488
984263bc
MD
1489/*
1490 * Sort the group list in increasing numerical order.
1491 * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
1492 * that used to be here.)
1493 */
1494void
e851b29e 1495nfsrvw_sort(gid_t *list, int num)
984263bc 1496{
40393ded 1497 int i, j;
984263bc
MD
1498 gid_t v;
1499
1500 /* Insertion sort. */
1501 for (i = 1; i < num; i++) {
1502 v = list[i];
1503 /* find correct slot for value v, moving others up */
1504 for (j = i; --j >= 0 && v < list[j];)
1505 list[j + 1] = list[j];
1506 list[j + 1] = v;
1507 }
1508}
1509
1510/*
1511 * copy credentials making sure that the result can be compared with bcmp().
1512 */
1513void
e851b29e 1514nfsrv_setcred(struct ucred *incred, struct ucred *outcred)
984263bc 1515{
40393ded 1516 int i;
984263bc
MD
1517
1518 bzero((caddr_t)outcred, sizeof (struct ucred));
1519 outcred->cr_ref = 1;
1520 outcred->cr_uid = incred->cr_uid;
1521 outcred->cr_ngroups = incred->cr_ngroups;
1522 for (i = 0; i < incred->cr_ngroups; i++)
1523 outcred->cr_groups[i] = incred->cr_groups[i];
1524 nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups);
1525}
1526#endif /* NFS_NOSERVER */