kernel - Fix NFS panic
[dragonfly.git] / sys / vfs / nfs / nfs_iod.c
1 /*
2  * Copyright (c) 2009 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * NFSIOD operations - now built into the kernel.
36  */
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/malloc.h>
41 #include <sys/mount.h>
42 #include <sys/kernel.h>
43 #include <sys/mbuf.h>
44 #include <sys/vnode.h>
45 #include <sys/fcntl.h>
46 #include <sys/protosw.h>
47 #include <sys/resourcevar.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/socketops.h>
51 #include <sys/syslog.h>
52 #include <sys/thread.h>
53 #include <sys/tprintf.h>
54 #include <sys/sysctl.h>
55 #include <sys/signalvar.h>
56 #include <sys/mutex.h>
57
58 #include <sys/signal2.h>
59 #include <sys/thread2.h>
60 #include <sys/mutex2.h>
61
62 #include <netinet/in.h>
63 #include <netinet/tcp.h>
64
65 #include "rpcv2.h"
66 #include "nfsproto.h"
67 #include "nfs.h"
68 #include "xdr_subs.h"
69 #include "nfsm_subs.h"
70 #include "nfsmount.h"
71 #include "nfsnode.h"
72 #include "nfsrtt.h"
73
74 /*
75  * nfs service connection reader thread
76  */
77 void
78 nfssvc_iod_reader(void *arg)
79 {
80         struct nfsmount *nmp = arg;
81         struct nfsm_info *info;
82         struct nfsreq *req;
83         int error;
84
85         lwkt_gettoken(&nmp->nm_token);
86
87         if (nmp->nm_rxstate == NFSSVC_INIT)
88                 nmp->nm_rxstate = NFSSVC_PENDING;
89         for (;;) {
90                 if (nmp->nm_rxstate == NFSSVC_WAITING) {
91                         if (TAILQ_FIRST(&nmp->nm_reqq) == NULL &&
92                             TAILQ_FIRST(&nmp->nm_reqrxq) == NULL) {
93                                 tsleep(&nmp->nm_rxstate, 0, "nfsidl", 0);
94                         } else {
95                                 /*
96                                  * This can happen during shutdown, we don't
97                                  * want to hardloop.
98                                  */
99                                 error = nfs_reply(nmp, NULL);
100                                 if (error && error != EWOULDBLOCK) {
101                                         tsleep(&nmp->nm_rxstate, 0,
102                                                 "nfsxxx", hz / 10);
103                                 }
104                         }
105                         continue;
106                 }
107                 if (nmp->nm_rxstate != NFSSVC_PENDING)
108                         break;
109                 nmp->nm_rxstate = NFSSVC_WAITING;
110
111                 /*
112                  * Process requests which have received replies.  Only
113                  * process the post-reply states.  If we get EINPROGRESS
114                  * it means the request went back to an auth or retransmit
115                  * state and we let the iod_writer thread deal with it.
116                  *
117                  * Any lock on the request is strictly temporary due to
118                  * MP races (XXX).
119                  *
120                  * If the request completes we run the info->done call
121                  * to finish up the I/O.
122                  */
123                 while ((req = TAILQ_FIRST(&nmp->nm_reqrxq)) != NULL) {
124                         if (req->r_flags & R_LOCKED) {
125                                 while (req->r_flags & R_LOCKED) {
126                                         req->r_flags |= R_WANTED;
127                                         tsleep(req, 0, "nfstrac", 0);
128                                 }
129                                 continue;
130                         }
131                         TAILQ_REMOVE(&nmp->nm_reqrxq, req, r_chain);
132                         info = req->r_info;
133                         KKASSERT(info);
134                         info->error = nfs_request(info,
135                                                   NFSM_STATE_PROCESSREPLY,
136                                                   NFSM_STATE_DONE);
137                         if (info->error == EINPROGRESS) {
138                                 kprintf("rxq: move info %p back to txq\n", info);
139                                 TAILQ_INSERT_TAIL(&nmp->nm_reqtxq, req, r_chain);
140                                 nfssvc_iod_writer_wakeup(nmp);
141                         } else {
142                                 atomic_subtract_int(&nmp->nm_bioqlen, 1);
143                                 info->done(info);
144                         }
145                 }
146         }
147         nmp->nm_rxthread = NULL;
148         nmp->nm_rxstate = NFSSVC_DONE;
149
150         lwkt_reltoken(&nmp->nm_token);
151         wakeup(&nmp->nm_rxthread);
152 }
153
154 /*
155  * nfs service connection writer thread
156  *
157  * The writer sits on the send side of the client's socket and
158  * does both the initial processing of BIOs and also transmission
159  * and retransmission of nfsreq's.
160  *
161  * The writer processes both new BIOs from nm_bioq and retransmit
162  * or state machine jumpbacks from nm_reqtxq
163  */
164 void
165 nfssvc_iod_writer(void *arg)
166 {
167         struct nfsmount *nmp = arg;
168         struct bio *bio;
169         struct nfsreq *req;
170         struct vnode *vp;
171         nfsm_info_t info;
172
173         lwkt_gettoken(&nmp->nm_token);
174
175         if (nmp->nm_txstate == NFSSVC_INIT)
176                 nmp->nm_txstate = NFSSVC_PENDING;
177
178         for (;;) {
179                 if (nmp->nm_txstate == NFSSVC_WAITING) {
180                         tsleep(&nmp->nm_txstate, 0, "nfsidl", 0);
181                         continue;
182                 }
183                 if (nmp->nm_txstate != NFSSVC_PENDING)
184                         break;
185                 nmp->nm_txstate = NFSSVC_WAITING;
186
187                 /*
188                  * Eep, we could blow out the mbuf allocator if we just
189                  * did everything the kernel wanted us to do.
190                  */
191                 while ((bio = TAILQ_FIRST(&nmp->nm_bioq)) != NULL) {
192                         if (nmp->nm_reqqlen > nfs_maxasyncbio)
193                                 break;
194                         TAILQ_REMOVE(&nmp->nm_bioq, bio, bio_act);
195                         vp = bio->bio_driver_info;
196                         nfs_startio(vp, bio, NULL);
197                 }
198
199                 /*
200                  * Process reauths & retransmits.  If we get an EINPROGRESS
201                  * it means the state transitioned to WAITREPLY or later.
202                  * Otherwise the request completed (probably with an error
203                  * since we didn't get to a replied state).
204                  */
205                 while ((req = TAILQ_FIRST(&nmp->nm_reqtxq)) != NULL) {
206                         TAILQ_REMOVE(&nmp->nm_reqtxq, req, r_chain);
207                         info = req->r_info;
208                         KKASSERT(info);
209                         info->error = nfs_request(info,
210                                                   NFSM_STATE_AUTH,
211                                                   NFSM_STATE_WAITREPLY);
212                         if (info->error == EINPROGRESS) {
213                                 ;
214                         } else {
215                                 atomic_subtract_int(&nmp->nm_bioqlen, 1);
216                                 info->done(info);
217                         }
218                 }
219         }
220         nmp->nm_txthread = NULL;
221         nmp->nm_txstate = NFSSVC_DONE;
222         lwkt_reltoken(&nmp->nm_token);
223         wakeup(&nmp->nm_txthread);
224 }
225
226 void
227 nfssvc_iod_stop1(struct nfsmount *nmp)
228 {
229         nmp->nm_txstate = NFSSVC_STOPPING;
230         nmp->nm_rxstate = NFSSVC_STOPPING;
231 }
232
233 void
234 nfssvc_iod_stop2(struct nfsmount *nmp)
235 {
236         wakeup(&nmp->nm_txstate);
237         while (nmp->nm_txthread)
238                 tsleep(&nmp->nm_txthread, 0, "nfssttx", hz*2);
239         wakeup(&nmp->nm_rxstate);
240         while (nmp->nm_rxthread)
241                 tsleep(&nmp->nm_rxthread, 0, "nfsstrx", hz*2);
242 }
243
244 void
245 nfssvc_iod_writer_wakeup(struct nfsmount *nmp)
246 {
247         if (nmp->nm_txstate == NFSSVC_WAITING) {
248                 nmp->nm_txstate = NFSSVC_PENDING;
249                 wakeup(&nmp->nm_txstate);
250         }
251 }
252
253 void
254 nfssvc_iod_reader_wakeup(struct nfsmount *nmp)
255 {
256         if (nmp->nm_rxstate == NFSSVC_WAITING) {
257                 nmp->nm_rxstate = NFSSVC_PENDING;
258                 wakeup(&nmp->nm_rxstate);
259         }
260 }