socket: introduce SO_RERROR to detect receive buffer overflow
authorRoy Marples <roy@marples.name>
Wed, 7 Aug 2019 00:48:21 +0000 (01:48 +0100)
committerRoy Marples <roy@marples.name>
Wed, 7 Aug 2019 00:48:21 +0000 (01:48 +0100)
kernel receive buffers are initially of a limited size and
generally the network protocols that use them don't care
if a packet gets lost.

However some users do care about lost messages even if not
baked into the protocol - such as consumers of route(4) to
track state.

POSIX states that read(2) can return an error of ENOBUFS so
return this error code when an overflow is detected.
Guard this with socket option SO_RERROR so that existing
applications which do not care can carry on not caring by
default.

Taken-from: NetBSD
Reviewed-by: sephe
15 files changed:
lib/libc/sys/getsockopt.2
sys/kern/uipc_socket.c
sys/kern/uipc_socket2.c
sys/net/ip_mroute/ip_mroute.c
sys/net/raw_usrreq.c
sys/netgraph/socket/ng_socket.c
sys/netinet/ip_divert.c
sys/netinet/raw_ip.c
sys/netinet/udp_usrreq.c
sys/netinet6/icmp6.c
sys/netinet6/ip6_mroute.c
sys/netinet6/raw_ip6.c
sys/netinet6/udp6_usrreq.c
sys/sys/socket.h
sys/sys/socketvar.h

index 68d2fc5..48e68b8 100644 (file)
@@ -28,7 +28,7 @@
 .\"     @(#)getsockopt.2       8.4 (Berkeley) 5/2/95
 .\" $FreeBSD: src/lib/libc/sys/getsockopt.2,v 1.12.2.11 2002/01/09 17:44:15 yar Exp $
 .\"
-.Dd May 2, 1995
+.Dd August 7, 2019
 .Dt GETSOCKOPT 2
 .Os
 .Sh NAME
@@ -152,6 +152,7 @@ and set with
 .It Dv SO_ACCEPTFILTER Ta "set accept filter on listening socket"
 .It Dv SO_TYPE Ta "get the type of the socket (get only)"
 .It Dv SO_ERROR Ta "get and clear error on the socket (get only)"
+.It Dv SO_RERROR Ta "enables receive error reporting"
 .El
 .Pp
 .Dv SO_DEBUG
@@ -180,6 +181,13 @@ indicates that outgoing messages should
 bypass the standard routing facilities.  Instead, messages are directed
 to the appropriate network interface according to the network portion
 of the destination address.
+.Dv SO_RERROR
+indicates that receive buffer overflows should be handled as errors.
+Historically receive buffer overflows have been ignored and programs
+could not tell if they missed messages or messages had been truncated
+because of overflows.
+Since programs historically do not expect to get receive overflow errors,
+this behavior is not the default.
 .Pp
 .Dv SO_LINGER
 controls the action taken when unsent messages
index 934cbb6..765ca5d 100644 (file)
@@ -1341,12 +1341,19 @@ restart:
            ((flags & MSG_WAITALL) && resid <= (size_t)so->so_rcv.ssb_hiwat)) &&
            m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
                KASSERT(m != NULL || !so->so_rcv.ssb_cc, ("receive 1"));
-               if (so->so_error) {
+               if (so->so_error || so->so_rerror) {
                        if (m)
                                goto dontblock;
-                       error = so->so_error;
-                       if ((flags & MSG_PEEK) == 0)
-                               so->so_error = 0;
+                       if (so->so_error)
+                               error = so->so_error;
+                       else
+                               error = so->so_rerror;
+                       if ((flags & MSG_PEEK) == 0) {
+                               if (so->so_error)
+                                       so->so_error = 0;
+                               else
+                                       so->so_rerror = 0;
+                       }
                        goto release;
                }
                if (so->so_state & SS_CANTRCVMORE) {
@@ -1539,7 +1546,8 @@ dontblock:
                while ((flags & MSG_WAITALL) && m == NULL && 
                       resid > 0 && !sosendallatonce(so) && 
                       so->so_rcv.ssb_mb == NULL) {
-                       if (so->so_error || so->so_state & SS_CANTRCVMORE)
+                       if (so->so_error || so->so_rerror ||
+                           so->so_state & SS_CANTRCVMORE)
                                break;
                        /*
                         * The window might have closed to zero, make
@@ -2136,6 +2144,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                case SO_OOBINLINE:
                case SO_TIMESTAMP:
                case SO_NOSIGPIPE:
+               case SO_RERROR:
                        error = sooptcopyin(sopt, &optval, sizeof optval,
                                            sizeof optval);
                        if (error)
@@ -2334,6 +2343,7 @@ sogetopt(struct socket *so, struct sockopt *sopt)
                case SO_OOBINLINE:
                case SO_TIMESTAMP:
                case SO_NOSIGPIPE:
+               case SO_RERROR:
                        optval = so->so_options & sopt->sopt_name;
 integer:
                        error = sooptcopyout(sopt, &optval, sizeof optval);
@@ -2344,8 +2354,13 @@ integer:
                        goto integer;
 
                case SO_ERROR:
-                       optval = so->so_error;
-                       so->so_error = 0;
+                       if (so->so_error) {
+                               optval = so->so_error;
+                               so->so_error = 0;
+                       } else {
+                               optval = so->so_rerror;
+                               so->so_rerror = 0;
+                       }
                        goto integer;
 
                case SO_SNDBUF:
@@ -2572,7 +2587,7 @@ filt_soread(struct knote *kn, long hint __unused)
                kn->kn_fflags = so->so_error;
                return (1);
        }
-       if (so->so_error)       /* temporary udp error */
+       if (so->so_error || so->so_rerror)
                return (1);
        if (kn->kn_sfflags & NOTE_LOWAT)
                return (kn->kn_data >= kn->kn_sdata);
index 6050851..5ec28d7 100644 (file)
@@ -522,6 +522,19 @@ socantrcvmore(struct socket *so)
        sorwakeup(so);
 }
 
+/*
+ * soroverflow(): indicates that data was attempted to be sent
+ * but the receiving buffer overflowed.
+ */
+void
+soroverflow(struct socket *so)
+{
+       if (so->so_options & SO_RERROR) {
+               so->so_rerror = ENOBUFS;
+               sorwakeup(so);
+       }
+}
+
 /*
  * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
  * via SIGIO if the socket has the SS_ASYNC flag set.
index 8ec3d6a..f7a6486 100644 (file)
@@ -1130,7 +1130,8 @@ socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
        if (ssb_appendaddr(&s->so_rcv, (struct sockaddr *)src, mm, NULL) != 0) {
            sorwakeup(s);
            return 0;
-       }
+       } else
+           soroverflow(s);
     }
     m_freem(mm);
     return -1;
@@ -2198,6 +2199,7 @@ X_rsvp_input(struct mbuf **mp, int *offp, int proto)
        m_freem(m);
        if (opts)
            m_freem(opts);
+       soroverflow(so);
        if (rsvpdebug)
            kprintf("rsvp_input: Failed to append to socket\n");
     }
index 94d3ecb..9d3186b 100644 (file)
@@ -108,8 +108,8 @@ raw_input(struct mbuf *m0, const struct sockproto *proto,
                                lwkt_gettoken(&last->so_rcv.ssb_token);
                                if (ssb_appendaddr(&last->so_rcv, src, n,
                                                 NULL) == 0) {
-                                       /* should notify about lost packet */
                                        m_freem(n);
+                                       soroverflow(last);
                                } else {
                                        sorwakeup(last);
                                }
@@ -120,9 +120,10 @@ raw_input(struct mbuf *m0, const struct sockproto *proto,
        }
        if (last) {
                lwkt_gettoken(&last->so_rcv.ssb_token);
-               if (ssb_appendaddr(&last->so_rcv, src, m, NULL) == 0)
+               if (ssb_appendaddr(&last->so_rcv, src, m, NULL) == 0) {
                        m_freem(m);
-               else
+                       soroverflow(last);
+               } else
                        sorwakeup(last);
                lwkt_reltoken(&last->so_rcv.ssb_token);
        } else {
index c546bde..6487908 100644 (file)
@@ -747,6 +747,7 @@ ship_msg(struct ngpcb *pcbp, struct ng_mesg *msg, struct sockaddr_ng *addr)
        lwkt_gettoken(&so->so_rcv.ssb_token);
        if (ssb_appendaddr(&so->so_rcv,
            (struct sockaddr *) addr, mdata, NULL) == 0) {
+               soroverflow(so);
                lwkt_reltoken(&so->so_rcv.ssb_token);
                TRAP_ERROR;
                m_freem(mdata);
@@ -868,6 +869,7 @@ ngs_rcvdata(hook_p hook, struct mbuf *m, meta_p meta)
        /* Try to tell the socket which hook it came in on */
        lwkt_gettoken(&so->so_rcv.ssb_token);
        if (ssb_appendaddr(&so->so_rcv, (struct sockaddr *) addr, m, NULL) == 0) {
+               soroverflow(so);
                lwkt_reltoken(&so->so_rcv.ssb_token);
                m_freem(m);
                TRAP_ERROR;
index 410f834..186f122 100644 (file)
@@ -239,9 +239,10 @@ div_packet(struct mbuf *m, int incoming, int port)
        }
        if (sa) {
                lwkt_gettoken(&sa->so_rcv.ssb_token);
-               if (ssb_appendaddr(&sa->so_rcv, (struct sockaddr *)&divsrc, m, NULL) == 0)
+               if (ssb_appendaddr(&sa->so_rcv, (struct sockaddr *)&divsrc, m, NULL) == 0) {
                        m_freem(m);
-               else
+                       soroverflow(sa);
+               } else
                        sorwakeup(sa);
                lwkt_reltoken(&sa->so_rcv.ssb_token);
        } else {
index 4faf6ca..922d7f6 100644 (file)
@@ -172,10 +172,10 @@ rip_input(struct mbuf **mp, int *offp, int proto)
                                if (ssb_appendaddr(&last->inp_socket->so_rcv,
                                            (struct sockaddr *)&ripsrc, n,
                                            opts) == 0) {
-                                       /* should notify about lost packet */
                                        m_freem(n);
                                        if (opts)
                                            m_freem(opts);
+                                       soroverflow(last->inp_socket);
                                } else {
                                        sorwakeup(last->inp_socket);
                                }
@@ -199,6 +199,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
                        m_freem(m);
                        if (opts)
                            m_freem(opts);
+                       soroverflow(last->inp_socket);
                } else {
                        sorwakeup(last->inp_socket);
                }
index f1ecb85..21f07f9 100644 (file)
@@ -635,6 +635,7 @@ done:
            (struct sockaddr *)&udp_in, m, opts) == 0) {
                lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token);
                udp_stat.udps_fullsock++;
+               soroverflow(inp->inp_socket);
                goto bad;
        }
        lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token);
index e5840f7..2f43361 100644 (file)
@@ -1861,10 +1861,10 @@ icmp6_rip6_input(struct mbuf **mp, int  off)
                                lwkt_gettoken(&so->so_rcv.ssb_token);
                                if (ssb_appendaddr(&so->so_rcv,
                                    (struct sockaddr *)&fromsa, n, opts) == 0) {
-                                       /* should notify about lost packet */
                                        m_freem(n);
                                        if (opts)
                                                m_freem(opts);
+                                       soroverflow(so);
                                } else {
                                        sorwakeup(so);
                                }
@@ -1888,6 +1888,7 @@ icmp6_rip6_input(struct   mbuf **mp, int  off)
                        m_freem(m);
                        if (opts)
                                m_freem(opts);
+                       soroverflow(so);
                } else {
                        sorwakeup(so);
                }
index ff91a35..b521148 100644 (file)
@@ -921,7 +921,8 @@ socket_send(struct socket *so, struct mbuf *mm, struct sockaddr_in6 *src)
                        sorwakeup(so);
                        lwkt_reltoken(&so->so_rcv.ssb_token);
                        return 0;
-               }
+               } else
+                       soroverflow(so);
                lwkt_reltoken(&so->so_rcv.ssb_token);
        }
        m_freem(mm);
index 0572a17..a7296c0 100644 (file)
@@ -175,6 +175,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
                                        if (opts)
                                                m_freem(opts);
                                        rip6stat.rip6s_fullsock++;
+                                       soroverflow(so);
                                } else {
                                        sorwakeup(so);
                                }
@@ -201,6 +202,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
                        if (opts)
                                m_freem(opts);
                        rip6stat.rip6s_fullsock++;
+                       soroverflow(so);
                } else {
                        sorwakeup(so);
                }
index 2164964..d75554d 100644 (file)
@@ -269,6 +269,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
                                                if (opts)
                                                        m_freem(opts);
                                                udp_stat.udps_fullsock++;
+                                               soroverflow(so);
                                        } else {
                                                sorwakeup(so);
                                        }
@@ -313,6 +314,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
                if (ssb_appendaddr(&so->so_rcv, (struct sockaddr *)&udp_in6,
                                   m, opts) == 0) {
                        udp_stat.udps_fullsock++;
+                       soroverflow(so);
                        lwkt_reltoken(&so->so_rcv.ssb_token);
                        goto bad;
                }
@@ -361,6 +363,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
        if (ssb_appendaddr(&so->so_rcv, (struct sockaddr *)&udp_in6,
                           m, opts) == 0) {
                udp_stat.udps_fullsock++;
+               soroverflow(so);
                lwkt_reltoken(&so->so_rcv.ssb_token);
                goto bad;
        }
index be5372d..07ed458 100644 (file)
@@ -99,6 +99,7 @@ typedef __socklen_t   socklen_t;
 #define        SO_TIMESTAMP    0x0400          /* timestamp received dgram traffic */
 #define        SO_NOSIGPIPE    0x0800          /* no SIGPIPE from EPIPE */
 #define        SO_ACCEPTFILTER 0x1000          /* there is an accept filter */
+#define        SO_RERROR       0x2000          /* Keep track of receive errors */
 
 /*
  * Additional options, not kept in so_options.
index 39c1681..dcad53c 100644 (file)
@@ -139,6 +139,7 @@ struct socket {
         */
        short   so_timeo;               /* connection timeout */
        u_short so_error;               /* error affecting connection */
+       u_short so_rerror;              /* error affecting receiving */
        struct  sigio *so_sigio;        /* information for async I/O or
                                           out of band data (SIGURG) */
        u_long  so_oobmark;             /* chars to oob mark */
@@ -425,6 +426,7 @@ struct      socket *soalloc (int waitok, struct protosw *);
 int    sobind (struct socket *so, struct sockaddr *nam, struct thread *td);
 void   socantrcvmore (struct socket *so);
 void   socantsendmore (struct socket *so);
+void   soroverflow(struct socket *so);
 int    socket_wait (struct socket *so, struct timespec *ts, int *res);
 int    soclose (struct socket *so, int fflags);
 int    soconnect (struct socket *so, struct sockaddr *nam, struct thread *td,