kernel - Deal with inconsistencies between IP aliases and primary IPs
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 17 Jun 2011 06:03:27 +0000 (23:03 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 17 Jun 2011 06:03:27 +0000 (23:03 -0700)
These changes allow normal IP aliases with proper network masks to be
ifconfig'd instead of forcing people to use non-obvious /32's for their
IP aliases.  It may also be possible to use overlapping subnets with
this change but this is not tested.

* When ifconfig'ing an interface with aliases using the correct netmask
  instead of a /32, the network route will point to only one of the
  addresses.

  Trying to connect to a local IP alias wound up failing due to the
  mismatch between the network route's interface address (ifaddr)
  entry and the actual ifaddr being requested.  The target was not
  being considered a local address when it was.

* Fix in_addroute()'s detection of local host routes when cloning a
  route to check against all aliases instead of the one the gateway
  network route happens to be pointed to, and adjust the route entry
  appropriately.

  This fix also properly sets RTF_LOCAL for all such cloned routes
  whereas before RTF_LOCAL was only being set for the interface's
  primary IP.

* Minor syntax adjustments and documentation changes.

Reported-by: Peter Avalos <peter@theshell.com>
sys/netinet/if_ether.c
sys/netinet/in_rmx.c

index 10f2353..b7fab4c 100644 (file)
@@ -191,6 +191,9 @@ arptimer(void *ignored_arg)
 
 /*
  * Parallel to llc_rtrequest.
+ *
+ * Called after a route is successfully added to the tree to fix-up the
+ * route and initiate arp operations if required.
  */
 static void
 arp_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
@@ -285,20 +288,24 @@ arp_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
                }
 #endif
 
+               /*
+                * This fixes up the routing interface for local addresses.
+                * The route is adjusted to point at lo0 and the expiration
+                * timer is disabled.
+                *
+                * NOTE: This prevents locally targetted traffic from going
+                *       out the hardware interface, which is inefficient
+                *       and might not work if the hardware cannot listen
+                *       to its own transmitted packets.   Setting
+                *       net.link.ether.inet.useloopback to 0 will force
+                *       packets for local addresses out the hardware (and
+                *       it is expected to receive its own packet).
+                *
+                * XXX We should just be able to test RTF_LOCAL here instead
+                *     of having to compare IPs.
+                */
                if (SIN(rt_key(rt))->sin_addr.s_addr ==
                    (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
-                       /*
-                        * This test used to be
-                        *      if (loif.if_flags & IFF_UP)
-                        * It allowed local traffic to be forced
-                        * through the hardware by configuring the
-                        * loopback down.  However, it causes problems
-                        * during network configuration for boards
-                        * that can't receive packets they send.  It
-                        * is now necessary to clear "useloopback" and
-                        * remove the route to force traffic out to
-                        * the hardware.
-                        */
                        rt->rt_expire = 0;
                        bcopy(IF_LLADDR(rt->rt_ifp), LLADDR(SDL(gate)),
                              SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
index f80d96f..635a4dc 100644 (file)
@@ -55,6 +55,7 @@
 
 #include <net/if.h>
 #include <net/route.h>
+#include <net/if_var.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
@@ -74,8 +75,14 @@ in_addroute(char *key, char *mask, struct radix_node_head *head,
        struct rtentry *rt = (struct rtentry *)treenodes;
        struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
        struct radix_node *ret;
+       struct in_ifaddr_container *iac;
+       struct in_ifaddr *ia;
 
        /*
+        * For IP, mark routes to multicast addresses as such, because
+        * it's easy to do and might be useful (but this is much more
+        * dubious since it's so easy to inspect the address).
+        *
         * For IP, all unicast non-host routes are automatically cloning.
         */
        if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
@@ -85,28 +92,39 @@ in_addroute(char *key, char *mask, struct radix_node_head *head,
                rt->rt_flags |= RTF_PRCLONING;
 
        /*
-        * A little bit of help for both IP output and input:
         *   For host routes, we make sure that RTF_BROADCAST
         *   is set for anything that looks like a broadcast address.
         *   This way, we can avoid an expensive call to in_broadcast()
         *   in ip_output() most of the time (because the route passed
         *   to ip_output() is almost always a host route).
         *
-        *   We also do the same for local addresses, with the thought
-        *   that this might one day be used to speed up ip_input().
+        *   For local routes we set RTF_LOCAL allowing various shortcuts.
         *
-        * We also mark routes to multicast addresses as such, because
-        * it's easy to do and might be useful (but this is much more
-        * dubious since it's so easy to inspect the address).  (This
-        * is done above.)
+        *   A cloned network route will point to one of several possible
+        *   addresses if an interface has aliases and must be repointed
+        *   back to the correct address or arp_rtrequest() will not properly
+        *   detect the local ip.
         */
        if (rt->rt_flags & RTF_HOST) {
                if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
                        rt->rt_flags |= RTF_BROADCAST;
+               } else if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr ==
+                          sin->sin_addr.s_addr) {
+                       rt->rt_flags |= RTF_LOCAL;
                } else {
-                       if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr
-                           == sin->sin_addr.s_addr)
-                               rt->rt_flags |= RTF_LOCAL;
+                       LIST_FOREACH(iac, INADDR_HASH(sin->sin_addr.s_addr),
+                                    ia_hash) {
+                               ia = iac->ia;
+                               if (sin->sin_addr.s_addr ==
+                                   ia->ia_addr.sin_addr.s_addr) {
+                                       rt->rt_flags |= RTF_LOCAL;
+                                       IFAREF(&ia->ia_ifa);
+                                       IFAFREE(rt->rt_ifa);
+                                       rt->rt_ifa = &ia->ia_ifa;
+                                       rt->rt_ifp = rt->rt_ifa->ifa_ifp;
+                                       break;
+                               }
+                       }
                }
        }
 
@@ -115,7 +133,7 @@ in_addroute(char *key, char *mask, struct radix_node_head *head,
                rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
 
        ret = rn_addroute(key, mask, head, treenodes);
-       if (ret == NULL && rt->rt_flags & RTF_HOST) {
+       if (ret == NULL && (rt->rt_flags & RTF_HOST)) {
                struct rtentry *oldrt;
 
                /*
@@ -126,8 +144,8 @@ in_addroute(char *key, char *mask, struct radix_node_head *head,
                oldrt = rtpurelookup((struct sockaddr *)sin);
                if (oldrt != NULL) {
                        --oldrt->rt_refcnt;
-                       if (oldrt->rt_flags & RTF_LLINFO &&
-                           oldrt->rt_flags & RTF_HOST &&
+                       if ((oldrt->rt_flags & RTF_LLINFO) &&
+                           (oldrt->rt_flags & RTF_HOST) &&
                            oldrt->rt_gateway &&
                            oldrt->rt_gateway->sa_family == AF_LINK) {
                                rtrequest(RTM_DELETE, rt_key(oldrt),
@@ -146,8 +164,9 @@ in_addroute(char *key, char *mask, struct radix_node_head *head,
         */
        if (ret != NULL &&
            (rt->rt_flags &
-            (RTF_MULTICAST | RTF_BROADCAST | RTF_WASCLONED)) == 0)
+            (RTF_MULTICAST | RTF_BROADCAST | RTF_WASCLONED)) == 0) {
                ipflow_flush_oncpu();
+       }
        return ret;
 }