Netgraph port from FreeBSD - initial porting work
[dragonfly.git] / sys / netgraph7 / ng_tcpmss.c
1 /*-
2  * ng_tcpmss.c
3  *
4  * Copyright (c) 2004, Alexey Popov <lollypop@flexuser.ru>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * This software includes fragments of the following programs:
30  *      tcpmssd         Ruslan Ermilov <ru@FreeBSD.org>
31  *
32  * $FreeBSD: src/sys/netgraph/ng_tcpmss.c,v 1.4 2007/01/15 05:01:31 glebius Exp $
33  * $DragonFly: src/sys/netgraph7/ng_tcpmss.c,v 1.2 2008/06/26 23:05:35 dillon Exp $
34  */
35
36 /*
37  * This node is netgraph tool for workaround of PMTUD problem. It acts
38  * like filter for IP packets. If configured, it reduces MSS of TCP SYN
39  * packets.
40  *
41  * Configuration can be done by sending NGM_TCPMSS_CONFIG message. The
42  * message sets filter for incoming packets on hook 'inHook'. Packet's
43  * TCP MSS field is lowered to 'maxMSS' parameter and resulting packet
44  * is sent to 'outHook'.
45  *
46  * XXX: statistics are updated not atomically, so they may broke on SMP.
47  */
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/errno.h>
52 #include <sys/kernel.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60
61 #include "ng_message.h"
62 #include "netgraph.h"
63 #include "ng_parse.h"
64 #include "ng_tcpmss.h"
65
66 /* Per hook info. */
67 typedef struct {
68         hook_p                          outHook;
69         struct ng_tcpmss_hookstat       stats;
70 } *hpriv_p;
71
72 /* Netgraph methods. */
73 static ng_constructor_t ng_tcpmss_constructor;
74 static ng_rcvmsg_t      ng_tcpmss_rcvmsg;
75 static ng_newhook_t     ng_tcpmss_newhook;
76 static ng_rcvdata_t     ng_tcpmss_rcvdata;
77 static ng_disconnect_t  ng_tcpmss_disconnect;
78
79 static int correct_mss(struct tcphdr *, int, uint16_t, int);
80
81 /* Parse type for struct ng_tcpmss_hookstat. */
82 static const struct ng_parse_struct_field ng_tcpmss_hookstat_type_fields[]
83         = NG_TCPMSS_HOOKSTAT_INFO;
84 static const struct ng_parse_type ng_tcpmss_hookstat_type = {
85         &ng_parse_struct_type,
86         &ng_tcpmss_hookstat_type_fields
87 };
88
89 /* Parse type for struct ng_tcpmss_config. */
90 static const struct ng_parse_struct_field ng_tcpmss_config_type_fields[]
91         = NG_TCPMSS_CONFIG_INFO;
92 static const struct ng_parse_type ng_tcpmss_config_type = {
93         &ng_parse_struct_type,
94         ng_tcpmss_config_type_fields
95 };
96
97 /* List of commands and how to convert arguments to/from ASCII. */
98 static const struct ng_cmdlist ng_tcpmss_cmds[] = {
99         {
100           NGM_TCPMSS_COOKIE,
101           NGM_TCPMSS_GET_STATS,
102           "getstats",
103           &ng_parse_hookbuf_type,
104           &ng_tcpmss_hookstat_type
105         },
106         {
107           NGM_TCPMSS_COOKIE,
108           NGM_TCPMSS_CLR_STATS,
109           "clrstats",
110           &ng_parse_hookbuf_type,
111           NULL
112         },
113         {
114           NGM_TCPMSS_COOKIE,
115           NGM_TCPMSS_GETCLR_STATS,
116           "getclrstats",
117           &ng_parse_hookbuf_type,
118           &ng_tcpmss_hookstat_type
119         },
120         {
121           NGM_TCPMSS_COOKIE,
122           NGM_TCPMSS_CONFIG,
123           "config",
124           &ng_tcpmss_config_type,
125           NULL
126         },
127         { 0 }
128 };
129
130 /* Netgraph type descriptor. */
131 static struct ng_type ng_tcpmss_typestruct = {
132         .version =      NG_ABI_VERSION,
133         .name =         NG_TCPMSS_NODE_TYPE,
134         .constructor =  ng_tcpmss_constructor,
135         .rcvmsg =       ng_tcpmss_rcvmsg,
136         .newhook =      ng_tcpmss_newhook,
137         .rcvdata =      ng_tcpmss_rcvdata,
138         .disconnect =   ng_tcpmss_disconnect,
139         .cmdlist =      ng_tcpmss_cmds,
140 };
141
142 NETGRAPH_INIT(tcpmss, &ng_tcpmss_typestruct);
143
144 #define ERROUT(x)       { error = (x); goto done; }
145
146 /*
147  * Node constructor. No special actions required.
148  */
149 static int
150 ng_tcpmss_constructor(node_p node)
151 {
152         return (0);
153 }
154
155 /*
156  * Add a hook. Any unique name is OK.
157  */
158 static int
159 ng_tcpmss_newhook(node_p node, hook_p hook, const char *name)
160 {
161         hpriv_p priv;
162
163         MALLOC(priv, hpriv_p, sizeof(*priv), M_NETGRAPH, M_WAITOK | M_NULLOK | M_ZERO);
164         if (priv == NULL)
165                 return (ENOMEM);
166
167         NG_HOOK_SET_PRIVATE(hook, priv);
168
169         return (0);
170 }
171
172 /*
173  * Receive a control message.
174  */
175 static int
176 ng_tcpmss_rcvmsg
177 (node_p node, item_p item, hook_p lasthook)
178 {
179         struct ng_mesg *msg, *resp = NULL;
180         int error = 0;
181
182         NGI_GET_MSG(item, msg);
183
184         switch (msg->header.typecookie) {
185         case NGM_TCPMSS_COOKIE:
186                 switch (msg->header.cmd) {
187                 case NGM_TCPMSS_GET_STATS:
188                 case NGM_TCPMSS_CLR_STATS:
189                 case NGM_TCPMSS_GETCLR_STATS:
190                     {
191                         hook_p hook;
192                         hpriv_p priv;
193
194                         /* Check that message is long enough. */
195                         if (msg->header.arglen != NG_HOOKSIZ)
196                                 ERROUT(EINVAL);
197
198                         /* Find this hook. */
199                         hook = ng_findhook(node, (char *)msg->data);
200                         if (hook == NULL)
201                                 ERROUT(ENOENT);
202
203                         priv = NG_HOOK_PRIVATE(hook);
204
205                         /* Create response. */
206                         if (msg->header.cmd != NGM_TCPMSS_CLR_STATS) {
207                                 NG_MKRESPONSE(resp, msg,
208                                     sizeof(struct ng_tcpmss_hookstat), M_WAITOK | M_NULLOK);
209                                 if (resp == NULL)
210                                         ERROUT(ENOMEM);
211                                 bcopy(&priv->stats, resp->data,
212                                     sizeof(struct ng_tcpmss_hookstat)); 
213                         }
214
215                         if (msg->header.cmd != NGM_TCPMSS_GET_STATS)
216                                 bzero(&priv->stats,
217                                     sizeof(struct ng_tcpmss_hookstat));
218                         break;
219                     }
220                 case NGM_TCPMSS_CONFIG:
221                     {
222                         struct ng_tcpmss_config *set;
223                         hook_p in, out;
224                         hpriv_p priv;
225
226                         /* Check that message is long enough. */
227                         if (msg->header.arglen !=
228                             sizeof(struct ng_tcpmss_config))
229                                 ERROUT(EINVAL);
230
231                         set = (struct ng_tcpmss_config *)msg->data;
232                         in = ng_findhook(node, set->inHook);
233                         out = ng_findhook(node, set->outHook);
234                         if (in == NULL || out == NULL)
235                                 ERROUT(ENOENT);
236
237                         /* Configure MSS hack. */
238                         priv = NG_HOOK_PRIVATE(in);
239                         priv->outHook = out;
240                         priv->stats.maxMSS = set->maxMSS;
241
242                         break;
243                     }
244                 default:
245                         error = EINVAL;
246                         break;
247                 }
248                 break;
249         default:
250                 error = EINVAL;
251                 break;
252         }
253
254 done:
255         NG_RESPOND_MSG(error, node, item, resp);
256         NG_FREE_MSG(msg);
257
258         return (error);
259 }
260
261 /*
262  * Receive data on a hook, and hack MSS.
263  *
264  */
265 static int
266 ng_tcpmss_rcvdata(hook_p hook, item_p item)
267 {
268         hpriv_p priv = NG_HOOK_PRIVATE(hook);
269         struct mbuf *m = NULL;
270         struct ip *ip;
271         struct tcphdr *tcp;
272         int iphlen, tcphlen, pktlen;
273         int pullup_len = 0;
274         int error = 0;
275
276         /* Drop packets if filter is not configured on this hook. */
277         if (priv->outHook == NULL)
278                 goto done;
279
280         NGI_GET_M(item, m);
281
282         /* Update stats on incoming hook. */
283         pktlen = m->m_pkthdr.len;
284         priv->stats.Octets += pktlen;
285         priv->stats.Packets++;
286
287         /* Check whether we configured to fix MSS. */
288         if (priv->stats.maxMSS == 0)
289                 goto send;
290
291 #define M_CHECK(length) do {                                    \
292         pullup_len += length;                                   \
293         if ((m)->m_pkthdr.len < pullup_len)                     \
294                 goto send;                                      \
295         if ((m)->m_len < pullup_len &&                          \
296            (((m) = m_pullup((m), pullup_len)) == NULL))         \
297                 ERROUT(ENOBUFS);                                \
298         } while (0)
299
300         /* Check mbuf packet size and arrange for IP header. */
301         M_CHECK(sizeof(struct ip));
302         ip = mtod(m, struct ip *);
303
304         /* Check IP version. */
305         if (ip->ip_v != IPVERSION)
306                 ERROUT(EINVAL);
307
308         /* Check IP header length. */
309         iphlen = ip->ip_hl << 2;
310         if (iphlen < sizeof(struct ip) || iphlen > pktlen )
311                 ERROUT(EINVAL);
312
313         /* Check if it is TCP. */
314         if (!(ip->ip_p == IPPROTO_TCP))
315                 goto send;
316
317         /* Check mbuf packet size and arrange for IP+TCP header */
318         M_CHECK(iphlen - sizeof(struct ip) + sizeof(struct tcphdr));
319         ip = mtod(m, struct ip *);
320         tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
321
322         /* Check TCP header length. */
323         tcphlen = tcp->th_off << 2;
324         if (tcphlen < sizeof(struct tcphdr) || tcphlen > pktlen - iphlen)
325                 ERROUT(EINVAL);
326
327         /* Check SYN packet and has options. */
328         if (!(tcp->th_flags & TH_SYN) || tcphlen == sizeof(struct tcphdr))
329                 goto send;
330         
331         /* Update SYN stats. */
332         priv->stats.SYNPkts++;
333
334         M_CHECK(tcphlen - sizeof(struct tcphdr));
335         ip = mtod(m, struct ip *);
336         tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
337
338 #undef  M_CHECK
339
340         /* Fix MSS and update stats. */
341         if (correct_mss(tcp, tcphlen, priv->stats.maxMSS,
342             m->m_pkthdr.csum_flags))
343                 priv->stats.FixedPkts++;
344
345 send:
346         /* Deliver frame out destination hook. */
347         NG_FWD_NEW_DATA(error, item, priv->outHook, m);
348
349         return (error);
350
351 done:
352         NG_FREE_ITEM(item);
353         NG_FREE_M(m);
354
355         return (error);
356 }
357
358 /*
359  * Hook disconnection.
360  * We must check all hooks, since they may reference this one.
361  */
362 static int
363 ng_tcpmss_disconnect(hook_p hook)
364 {
365         node_p node = NG_HOOK_NODE(hook);
366         hook_p hook2;
367
368         LIST_FOREACH(hook2, &node->nd_hooks, hk_hooks) {
369                 hpriv_p priv = NG_HOOK_PRIVATE(hook2);
370
371                 if (priv->outHook == hook)
372                         priv->outHook = NULL;
373         }
374
375         FREE(NG_HOOK_PRIVATE(hook), M_NETGRAPH);
376
377         if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0)
378                 ng_rmnode_self(NG_HOOK_NODE(hook));
379
380         return (0);
381 }
382
383 /*
384  * Code from tcpmssd.
385  */
386
387 /*-
388  * The following macro is used to update an
389  * internet checksum.  "acc" is a 32-bit
390  * accumulation of all the changes to the
391  * checksum (adding in old 16-bit words and
392  * subtracting out new words), and "cksum"
393  * is the checksum value to be updated.
394  */
395 #define TCPMSS_ADJUST_CHECKSUM(acc, cksum) do {         \
396         acc += cksum;                                   \
397         if (acc < 0) {                                  \
398                 acc = -acc;                             \
399                 acc = (acc >> 16) + (acc & 0xffff);     \
400                 acc += acc >> 16;                       \
401                 cksum = (u_short) ~acc;                 \
402         } else {                                        \
403                 acc = (acc >> 16) + (acc & 0xffff);     \
404                 acc += acc >> 16;                       \
405                 cksum = (u_short) acc;                  \
406         }                                               \
407 } while (0);
408
409 static int
410 correct_mss(struct tcphdr *tc, int hlen, uint16_t maxmss, int flags)
411 {
412         int olen, optlen;
413         u_char *opt;
414         uint16_t *mss;
415         int accumulate;
416         int res = 0;
417
418         for (olen = hlen - sizeof(struct tcphdr), opt = (u_char *)(tc + 1);
419              olen > 0; olen -= optlen, opt += optlen) {
420                 if (*opt == TCPOPT_EOL)
421                         break;
422                 else if (*opt == TCPOPT_NOP)
423                         optlen = 1;
424                 else {
425                         optlen = *(opt + 1);
426                         if (optlen <= 0 || optlen > olen)
427                                 break;
428                         if (*opt == TCPOPT_MAXSEG) {
429                                 if (optlen != TCPOLEN_MAXSEG)
430                                         continue;
431                                 mss = (uint16_t *)(opt + 2);
432                                 if (ntohs(*mss) > maxmss) {
433                                         accumulate = *mss;
434                                         *mss = htons(maxmss);
435                                         accumulate -= *mss;
436                                         if ((flags & CSUM_TCP) == 0)
437                                                 TCPMSS_ADJUST_CHECKSUM(accumulate, tc->th_sum);
438                                         res = 1;
439                                 }
440                         }
441                 }
442         }
443         return (res);
444 }