Merge branches 'master' and 'suser_to_priv'
[dragonfly.git] / sys / netinet / in.c
index deb699d..8693d69 100644 (file)
@@ -32,7 +32,7 @@
  *
  *     @(#)in.c        8.4 (Berkeley) 1/9/95
  * $FreeBSD: src/sys/netinet/in.c,v 1.44.2.14 2002/11/08 00:45:50 suz Exp $
- * $DragonFly: src/sys/netinet/in.c,v 1.26 2008/03/07 11:34:20 sephe Exp $
+ * $DragonFly: src/sys/netinet/in.c,v 1.41 2008/08/17 05:20:10 sephe Exp $
  */
 
 #include "opt_bootp.h"
@@ -42,6 +42,7 @@
 #include <sys/sockio.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
+#include <sys/priv.h>
 #include <sys/msgport.h>
 #include <sys/socket.h>
 
@@ -68,13 +69,16 @@ static int in_lifaddr_ioctl (struct socket *, u_long, caddr_t,
        struct ifnet *, struct thread *);
 
 static void    in_socktrim (struct sockaddr_in *);
-static int     in_ifinit (struct ifnet *,
-           struct in_ifaddr *, struct sockaddr_in *, int);
+static int     in_ifinit(struct ifnet *, struct in_ifaddr *,
+                   const struct sockaddr_in *, int);
 
 static void    in_control_dispatch(struct netmsg *);
 static int     in_control_internal(u_long, caddr_t, struct ifnet *,
                    struct thread *);
 
+static int     in_addprefix(struct in_ifaddr *, int);
+static void    in_scrubprefix(struct in_ifaddr *);
+
 static int subnetsarelocal = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW,
        &subnetsarelocal, 0, "");
@@ -94,16 +98,23 @@ int
 in_localaddr(struct in_addr in)
 {
        u_long i = ntohl(in.s_addr);
+       struct in_ifaddr_container *iac;
        struct in_ifaddr *ia;
 
        if (subnetsarelocal) {
-               TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
+               TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
+                       ia = iac->ia;
+
                        if ((i & ia->ia_netmask) == ia->ia_net)
                                return (1);
+               }
        } else {
-               TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
+               TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
+                       ia = iac->ia;
+
                        if ((i & ia->ia_subnetmask) == ia->ia_subnet)
                                return (1);
+               }
        }
        return (0);
 }
@@ -220,9 +231,9 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
        switch (cmd) {
        case SIOCALIFADDR:
        case SIOCDLIFADDR:
-               if (td && (error = suser(td)) != 0)
+               if (td && (error = priv_check(td, PRIV_ROOT)) != 0)
                        return error;
-               /*fall through*/
+               /* FALLTHROUGH */
        case SIOCGLIFADDR:
                if (!ifp)
                        return EINVAL;
@@ -233,6 +244,9 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
                ("recursive SIOC%cLIFADDR!\n",
                 cmd == SIOCDLIFADDR ? 'D' : 'A'));
 
+       /*
+        * IFADDR alterations are serialized by netisr0
+        */
        switch (cmd) {
        case SIOCSIFDSTADDR:
        case SIOCSIFBRDADDR:
@@ -258,20 +272,194 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
        }
 }
 
+static void
+in_ialink_dispatch(struct netmsg *nmsg)
+{
+       struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
+       struct in_ifaddr *ia = lmsg->u.ms_resultp;
+       struct ifaddr_container *ifac;
+       struct in_ifaddr_container *iac;
+       int cpu = mycpuid;
+
+       crit_enter();
+
+       ifac = &ia->ia_ifa.ifa_containers[cpu];
+       ASSERT_IFAC_VALID(ifac);
+       KASSERT((ifac->ifa_listmask & IFA_LIST_IN_IFADDRHEAD) == 0,
+               ("ia is on in_ifaddrheads\n"));
+
+       ifac->ifa_listmask |= IFA_LIST_IN_IFADDRHEAD;
+       iac = &ifac->ifa_proto_u.u_in_ifac;
+       TAILQ_INSERT_TAIL(&in_ifaddrheads[cpu], iac, ia_link);
+
+       crit_exit();
+
+       ifa_forwardmsg(lmsg, cpu + 1);
+}
+
+static void
+in_iaunlink_dispatch(struct netmsg *nmsg)
+{
+       struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
+       struct in_ifaddr *ia = lmsg->u.ms_resultp;
+       struct ifaddr_container *ifac;
+       struct in_ifaddr_container *iac;
+       int cpu = mycpuid;
+
+       crit_enter();
+
+       ifac = &ia->ia_ifa.ifa_containers[cpu];
+       ASSERT_IFAC_VALID(ifac);
+       KASSERT(ifac->ifa_listmask & IFA_LIST_IN_IFADDRHEAD,
+               ("ia is not on in_ifaddrheads\n"));
+
+       iac = &ifac->ifa_proto_u.u_in_ifac;
+       TAILQ_REMOVE(&in_ifaddrheads[cpu], iac, ia_link);
+       ifac->ifa_listmask &= ~IFA_LIST_IN_IFADDRHEAD;
+
+       crit_exit();
+
+       ifa_forwardmsg(lmsg, cpu + 1);
+}
+
+static void
+in_iahashins_dispatch(struct netmsg *nmsg)
+{
+       struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
+       struct in_ifaddr *ia = lmsg->u.ms_resultp;
+       struct ifaddr_container *ifac;
+       struct in_ifaddr_container *iac;
+       int cpu = mycpuid;
+
+       crit_enter();
+
+       ifac = &ia->ia_ifa.ifa_containers[cpu];
+       ASSERT_IFAC_VALID(ifac);
+       KASSERT((ifac->ifa_listmask & IFA_LIST_IN_IFADDRHASH) == 0,
+               ("ia is on in_ifaddrhashtbls\n"));
+
+       ifac->ifa_listmask |= IFA_LIST_IN_IFADDRHASH;
+       iac = &ifac->ifa_proto_u.u_in_ifac;
+       LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
+                        iac, ia_hash);
+
+       crit_exit();
+
+       ifa_forwardmsg(lmsg, cpu + 1);
+}
+
+static void
+in_iahashrem_dispatch(struct netmsg *nmsg)
+{
+       struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
+       struct in_ifaddr *ia = lmsg->u.ms_resultp;
+       struct ifaddr_container *ifac;
+       struct in_ifaddr_container *iac;
+       int cpu = mycpuid;
+
+       crit_enter();
+
+       ifac = &ia->ia_ifa.ifa_containers[cpu];
+       ASSERT_IFAC_VALID(ifac);
+       KASSERT(ifac->ifa_listmask & IFA_LIST_IN_IFADDRHASH,
+               ("ia is not on in_ifaddrhashtbls\n"));
+
+       iac = &ifac->ifa_proto_u.u_in_ifac;
+       LIST_REMOVE(iac, ia_hash);
+       ifac->ifa_listmask &= ~IFA_LIST_IN_IFADDRHASH;
+
+       crit_exit();
+
+       ifa_forwardmsg(lmsg, cpu + 1);
+}
+
+static void
+in_ialink(struct in_ifaddr *ia)
+{
+       struct netmsg nmsg;
+       struct lwkt_msg *lmsg;
+
+       netmsg_init(&nmsg, &curthread->td_msgport, 0, in_ialink_dispatch);
+       lmsg = &nmsg.nm_lmsg;
+       lmsg->u.ms_resultp = ia;
+
+       ifa_domsg(lmsg, 0);
+}
+
+void
+in_iaunlink(struct in_ifaddr *ia)
+{
+       struct netmsg nmsg;
+       struct lwkt_msg *lmsg;
+
+       netmsg_init(&nmsg, &curthread->td_msgport, 0, in_iaunlink_dispatch);
+       lmsg = &nmsg.nm_lmsg;
+       lmsg->u.ms_resultp = ia;
+
+       ifa_domsg(lmsg, 0);
+}
+
+void
+in_iahash_insert(struct in_ifaddr *ia)
+{
+       struct netmsg nmsg;
+       struct lwkt_msg *lmsg;
+
+       netmsg_init(&nmsg, &curthread->td_msgport, 0, in_iahashins_dispatch);
+       lmsg = &nmsg.nm_lmsg;
+       lmsg->u.ms_resultp = ia;
+
+       ifa_domsg(lmsg, 0);
+}
+
+void
+in_iahash_remove(struct in_ifaddr *ia)
+{
+       struct netmsg nmsg;
+       struct lwkt_msg *lmsg;
+
+       netmsg_init(&nmsg, &curthread->td_msgport, 0, in_iahashrem_dispatch);
+       lmsg = &nmsg.nm_lmsg;
+       lmsg->u.ms_resultp = ia;
+
+       ifa_domsg(lmsg, 0);
+}
+
+static __inline struct in_ifaddr *
+in_ianext(struct in_ifaddr *oia)
+{
+       struct ifaddr_container *ifac;
+       struct in_ifaddr_container *iac;
+
+       ifac = &oia->ia_ifa.ifa_containers[mycpuid];
+       ASSERT_IFAC_VALID(ifac);
+       KASSERT(ifac->ifa_listmask & IFA_LIST_IN_IFADDRHEAD,
+               ("ia is not on in_ifaddrheads\n"));
+
+       iac = &ifac->ifa_proto_u.u_in_ifac;
+       iac = TAILQ_NEXT(iac, ia_link);
+       if (iac != NULL)
+               return iac->ia;
+       else
+               return NULL;
+}
+
 static int
 in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
                    struct thread *td)
 {
        struct ifreq *ifr = (struct ifreq *)data;
-       struct in_ifaddr *ia = 0, *iap;
+       struct in_ifaddr *ia = NULL;
        struct in_addr dst;
-       struct in_ifaddr *oia;
        struct in_aliasreq *ifra = (struct in_aliasreq *)data;
+       struct ifaddr_container *ifac;
+       struct in_ifaddr_container *iac;
        struct sockaddr_in oldaddr;
-       int hostIsNew, iaIsNew, maskIsNew;
+       int hostIsNew, iaIsNew, maskIsNew, ifpWasUp;
        int error = 0;
 
        iaIsNew = 0;
+       ifpWasUp = 0;
 
        /*
         * Find address for this interface, if it exists.
@@ -280,16 +468,18 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
         * the first one on the interface, if possible
         */
        if (ifp) {
+               struct in_ifaddr *iap;
+
                dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
-               LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash)
+               LIST_FOREACH(iac, INADDR_HASH(dst.s_addr), ia_hash) {
+                       iap = iac->ia;
                        if (iap->ia_ifp == ifp &&
                            iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
                                ia = iap;
                                break;
                        }
+               }
                if (ia == NULL) {
-                       struct ifaddr_container *ifac;
-
                        TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid],
                                      ifa_link) {
                                iap = ifatoia(ifac->ifa);
@@ -299,25 +489,27 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
                                }
                        }
                }
+
+               if (ifp->if_flags & IFF_UP)
+                       ifpWasUp = 1;
        }
 
        switch (cmd) {
-
        case SIOCAIFADDR:
        case SIOCDIFADDR:
                if (ifp == NULL)
                        return (EADDRNOTAVAIL);
                if (ifra->ifra_addr.sin_family == AF_INET) {
-                       for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
+                       while (ia != NULL) {
                                if (ia->ia_ifp == ifp  &&
                                    ia->ia_addr.sin_addr.s_addr ==
                                    ifra->ifra_addr.sin_addr.s_addr)
                                        break;
+                               ia = in_ianext(ia);
                        }
-                       if ((ifp->if_flags & IFF_POINTOPOINT)
-                           && (cmd == SIOCAIFADDR)
-                           && (ifra->ifra_dstaddr.sin_addr.s_addr
-                               == INADDR_ANY)) {
+                       if ((ifp->if_flags & IFF_POINTOPOINT) &&
+                           cmd == SIOCAIFADDR &&
+                           ifra->ifra_dstaddr.sin_addr.s_addr == INADDR_ANY) {
                                return EDESTADDRREQ;
                        }
                }
@@ -327,24 +519,40 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
        case SIOCSIFADDR:
        case SIOCSIFNETMASK:
        case SIOCSIFDSTADDR:
-               if (td && (error = suser(td)) != 0)
+               if (td && (error = priv_check(td, PRIV_ROOT)) != 0)
                        return error;
 
                if (ifp == NULL)
                        return (EADDRNOTAVAIL);
+
+               if (cmd == SIOCSIFDSTADDR &&
+                   (ifp->if_flags & IFF_POINTOPOINT) == 0)
+                       return (EINVAL);
+
                if (ia == NULL) {
                        struct ifaddr *ifa;
+                       int i;
 
                        ia = ifa_create(sizeof(*ia), M_WAITOK);
+                       ifa = &ia->ia_ifa;
+
+                       /*
+                        * Setup per-CPU information
+                        */
+                       for (i = 0; i < ncpus; ++i) {
+                               ifac = &ifa->ifa_containers[i];
+                               iac = &ifac->ifa_proto_u.u_in_ifac;
+                               iac->ia = ia;
+                               iac->ia_ifac = ifac;
+                       }
 
                        /*
                         * Protect from NETISR_IP traversing address list
                         * while we're modifying it.
                         */
                        crit_enter();
-                       
-                       TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_link);
-                       ifa = &ia->ia_ifa;
+
+                       in_ialink(ia);
                        ifa_iflink(ifa, ifp, 1);
 
                        ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
@@ -360,12 +568,13 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
                        if (!(ifp->if_flags & IFF_LOOPBACK))
                                in_interfaces++;
                        iaIsNew = 1;
+
                        crit_exit();
                }
                break;
 
        case SIOCSIFBRDADDR:
-               if (td && (error = suser(td)) != 0)
+               if (td && (error = priv_check(td, PRIV_ROOT)) != 0)
                        return error;
                /* FALLTHROUGH */
 
@@ -377,8 +586,8 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
                        return (EADDRNOTAVAIL);
                break;
        }
-       switch (cmd) {
 
+       switch (cmd) {
        case SIOCGIFADDR:
                *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
                return (0);
@@ -400,17 +609,19 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
                return (0);
 
        case SIOCSIFDSTADDR:
-               if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
-                       return (EINVAL);
+               KKASSERT(ifp->if_flags & IFF_POINTOPOINT);
+
                oldaddr = ia->ia_dstaddr;
                ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
-               lwkt_serialize_enter(ifp->if_serializer);
-               if (ifp->if_ioctl &&
-                   (error = ifp->if_ioctl(ifp, SIOCSIFDSTADDR, (caddr_t)ia,
-                                             td->td_proc->p_ucred))) {
-                       ia->ia_dstaddr = oldaddr;
+               if (ifp->if_ioctl != NULL) {
+                       lwkt_serialize_enter(ifp->if_serializer);
+                       error = ifp->if_ioctl(ifp, SIOCSIFDSTADDR, (caddr_t)ia,
+                                             td->td_proc->p_ucred);
                        lwkt_serialize_exit(ifp->if_serializer);
-                       return (error);
+                       if (error) {
+                               ia->ia_dstaddr = oldaddr;
+                               return (error);
+                       }
                }
                if (ia->ia_flags & IFA_ROUTE) {
                        ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
@@ -419,7 +630,6 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
                                        (struct sockaddr *)&ia->ia_dstaddr;
                        rtinit(&ia->ia_ifa, RTM_ADD, RTF_HOST | RTF_UP);
                }
-               lwkt_serialize_exit(ifp->if_serializer);
                return (0);
 
        case SIOCSIFBRDADDR:
@@ -430,11 +640,22 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
 
        case SIOCSIFADDR:
                error = in_ifinit(ifp, ia,
-                   (struct sockaddr_in *) &ifr->ifr_addr, 1);
+                   (const struct sockaddr_in *)&ifr->ifr_addr, 1);
                if (error != 0 && iaIsNew)
                        break;
-               if (error == 0)
-                       EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+               if (error == 0) {
+                       EVENTHANDLER_INVOKE(ifaddr_event, ifp,
+                       iaIsNew ? IFADDR_EVENT_ADD : IFADDR_EVENT_CHANGE,
+                       &ia->ia_ifa);
+               }
+               if (!ifpWasUp && (ifp->if_flags & IFF_UP)) {
+                       /*
+                        * Interface is brought up by in_ifinit()
+                        * (via ifp->if_ioctl).  We act as if the
+                        * interface got IFF_UP flag turned on.
+                        */
+                       if_up(ifp);
+               }
                return (0);
 
        case SIOCSIFNETMASK:
@@ -451,19 +672,20 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
                                ifra->ifra_addr = ia->ia_addr;
                                hostIsNew = 0;
                        } else if (ifra->ifra_addr.sin_addr.s_addr ==
-                                              ia->ia_addr.sin_addr.s_addr)
+                                  ia->ia_addr.sin_addr.s_addr) {
                                hostIsNew = 0;
+                       }
                }
                if (ifra->ifra_mask.sin_len) {
                        in_ifscrub(ifp, ia);
                        ia->ia_sockmask = ifra->ifra_mask;
                        ia->ia_sockmask.sin_family = AF_INET;
                        ia->ia_subnetmask =
-                            ntohl(ia->ia_sockmask.sin_addr.s_addr);
+                           ntohl(ia->ia_sockmask.sin_addr.s_addr);
                        maskIsNew = 1;
                }
                if ((ifp->if_flags & IFF_POINTOPOINT) &&
-                   (ifra->ifra_dstaddr.sin_family == AF_INET)) {
+                   ifra->ifra_dstaddr.sin_family == AF_INET) {
                        in_ifscrub(ifp, ia);
                        ia->ia_dstaddr = ifra->ifra_dstaddr;
                        maskIsNew  = 1; /* We lie; but the effect's the same */
@@ -476,10 +698,17 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
                        break;
 
                if ((ifp->if_flags & IFF_BROADCAST) &&
-                   (ifra->ifra_broadaddr.sin_family == AF_INET))
+                   ifra->ifra_broadaddr.sin_family == AF_INET)
                        ia->ia_broadaddr = ifra->ifra_broadaddr;
-               if (error == 0)
-                       EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+               if (error == 0) {
+                       EVENTHANDLER_INVOKE(ifaddr_event, ifp,
+                       iaIsNew ? IFADDR_EVENT_ADD : IFADDR_EVENT_CHANGE,
+                       &ia->ia_ifa);
+               }
+               if (!ifpWasUp && (ifp->if_flags & IFF_UP)) {
+                       /* See the comment in SIOCSIFADDR */
+                       if_up(ifp);
+               }
                return (error);
 
        case SIOCDIFADDR:
@@ -494,7 +723,8 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
                 * a routing process they will come back.
                 */
                in_ifadown(&ia->ia_ifa, 1);
-               EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+               EVENTHANDLER_INVOKE(ifaddr_event, ifp, IFADDR_EVENT_DELETE,
+                                   &ia->ia_ifa);
                error = 0;
                break;
 
@@ -507,19 +737,51 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp,
                return (error);
        }
 
+       KKASSERT(cmd == SIOCDIFADDR ||
+                ((cmd == SIOCAIFADDR || cmd == SIOCSIFADDR) && iaIsNew));
+
        ifa_ifunlink(&ia->ia_ifa, ifp);
+       in_iaunlink(ia);
 
-       /*
-        * Protect from NETISR_IP traversing address list while we're modifying
-        * it.
-        */
-       crit_enter();   /* XXX MP */
-       TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link);
-       LIST_REMOVE(ia, ia_hash);
-       crit_exit();    /* XXX MP */
+       if (cmd == SIOCDIFADDR) {
+               ifac = &ia->ia_ifa.ifa_containers[mycpuid];
+               if (ifac->ifa_listmask & IFA_LIST_IN_IFADDRHASH)
+                       in_iahash_remove(ia);
+       }
+#ifdef INVARIANTS
+       else {
+               /*
+                * If cmd is SIOCSIFADDR or SIOCAIFADDR, in_ifinit() has
+                * already taken care of the deletion from hash table
+                */
+               ifac = &ia->ia_ifa.ifa_containers[mycpuid];
+               KASSERT((ifac->ifa_listmask & IFA_LIST_IN_IFADDRHASH) == 0,
+                       ("SIOC%cIFADDR failed on new ia, "
+                        "but the new ia is still in hash table\n",
+                        cmd == SIOCSIFADDR ? 'S' : 'A'));
+       }
+#endif
 
        ifa_destroy(&ia->ia_ifa);
 
+       if ((cmd == SIOCAIFADDR || cmd == SIOCSIFADDR) &&
+           !ifpWasUp && (ifp->if_flags & IFF_UP)) {
+               /*
+                * Though the address assignment failed, the
+                * interface is brought up by in_ifinit()
+                * (via ifp->if_ioctl).  With the hope that
+                * the interface has some valid addresses, we
+                * act as if IFF_UP flag was just set on the
+                * interface.
+                *
+                * NOTE:
+                * This could only be done after the failed
+                * address is unlinked from the global address
+                * list.
+                */
+               if_up(ifp);
+       }
+
        return (error);
 }
 
@@ -706,16 +968,9 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
  * Delete any existing route for an interface.
  */
 void
-in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia)
+in_ifscrub(struct ifnet *ifp __unused, struct in_ifaddr *ia)
 {
-
-       if ((ia->ia_flags & IFA_ROUTE) == 0)
-               return;
-       if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
-               rtinit(&ia->ia_ifa, RTM_DELETE, RTF_HOST);
-       else
-               rtinit(&ia->ia_ifa, RTM_DELETE, 0);
-       ia->ia_flags &= ~IFA_ROUTE;
+       in_scrubprefix(ia);
 }
 
 /*
@@ -723,42 +978,52 @@ in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia)
  * and routing table entry.
  */
 static int
-in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, int scrub)
+in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia,
+         const struct sockaddr_in *sin, int scrub)
 {
        u_long i = ntohl(sin->sin_addr.s_addr);
        struct sockaddr_in oldaddr;
+       struct ifaddr_container *ifac;
        int flags = RTF_UP, error = 0;
+       int was_hash = 0;
 
-       lwkt_serialize_enter(ifp->if_serializer);
-
+       ifac = &ia->ia_ifa.ifa_containers[mycpuid];
        oldaddr = ia->ia_addr;
-       if (oldaddr.sin_family == AF_INET)
-               LIST_REMOVE(ia, ia_hash);
+
+       if (ifac->ifa_listmask & IFA_LIST_IN_IFADDRHASH) {
+               was_hash = 1;
+               in_iahash_remove(ia);
+       }
+
        ia->ia_addr = *sin;
        if (ia->ia_addr.sin_family == AF_INET)
-               LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
-                   ia, ia_hash);
+               in_iahash_insert(ia);
+
        /*
         * Give the interface a chance to initialize
         * if this is its first address,
         * and to validate the address if necessary.
         */
-       if (ifp->if_ioctl &&
-           (error = ifp->if_ioctl(ifp, SIOCSIFADDR, (caddr_t)ia, NULL))) {
+       if (ifp->if_ioctl != NULL) {
+               lwkt_serialize_enter(ifp->if_serializer);
+               error = ifp->if_ioctl(ifp, SIOCSIFADDR, (caddr_t)ia, NULL);
                lwkt_serialize_exit(ifp->if_serializer);
-               /* LIST_REMOVE(ia, ia_hash) is done in in_control */
-               ia->ia_addr = oldaddr;
-               if (ia->ia_addr.sin_family == AF_INET)
-                       LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
-                           ia, ia_hash);
-               return (error);
+               if (error)
+                       goto fail;
        }
-       lwkt_serialize_exit(ifp->if_serializer);
+
+       /*
+        * Delete old route, if requested.
+        */
        if (scrub) {
                ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
                in_ifscrub(ifp, ia);
                ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
        }
+
+       /*
+        * Calculate netmask/subnetmask.
+        */
        if (IN_CLASSA(i))
                ia->ia_netmask = IN_CLASSA_NET;
        else if (IN_CLASSB(i))
@@ -773,11 +1038,13 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, int
        if (ia->ia_subnetmask == 0) {
                ia->ia_subnetmask = ia->ia_netmask;
                ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
-       } else
+       } else {
                ia->ia_netmask &= ia->ia_subnetmask;
+       }
        ia->ia_net = i & ia->ia_netmask;
        ia->ia_subnet = i & ia->ia_subnetmask;
        in_socktrim(&ia->ia_sockmask);
+
        /*
         * Add route for the network.
         */
@@ -788,7 +1055,7 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, int
                ia->ia_netbroadcast.s_addr =
                        htonl(ia->ia_net | ~ ia->ia_netmask);
        } else if (ifp->if_flags & IFF_LOOPBACK) {
-               ia->ia_ifa.ifa_dstaddr = ia->ia_ifa.ifa_addr;
+               ia->ia_dstaddr = ia->ia_addr;
                flags |= RTF_HOST;
        } else if (ifp->if_flags & IFF_POINTOPOINT) {
                if (ia->ia_dstaddr.sin_family != AF_INET)
@@ -808,11 +1075,9 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, int
        if (ia->ia_addr.sin_addr.s_addr != INADDR_ANY ||
            ia->ia_netmask != IN_CLASSA_NET ||
            ia->ia_dstaddr.sin_addr.s_addr != htonl(IN_CLASSA_HOST)) {
-               if ((error = rtinit(&ia->ia_ifa, (int)RTM_ADD, flags)) != 0) {
-                       ia->ia_addr = oldaddr;
-                       return (error);
-               }
-               ia->ia_flags |= IFA_ROUTE;
+               error = in_addprefix(ia, flags);
+               if (error)
+                       goto fail;
        }
 
        /*
@@ -825,9 +1090,169 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, int
                addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
                in_addmulti(&addr, ifp);
        }
+       return (0);
+fail:
+       if (ifac->ifa_listmask & IFA_LIST_IN_IFADDRHASH)
+               in_iahash_remove(ia);
+
+       ia->ia_addr = oldaddr;
+       if (was_hash)
+               in_iahash_insert(ia);
        return (error);
 }
 
+#define rtinitflags(x) \
+       (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
+        ? RTF_HOST : 0)
+
+/*
+ * Add a route to prefix ("connected route" in cisco terminology).
+ * Do nothing, if there are some interface addresses with the same
+ * prefix already.  This function assumes that the 'target' parent
+ * interface is UP.
+ */
+static int
+in_addprefix(struct in_ifaddr *target, int flags)
+{
+       struct in_ifaddr_container *iac;
+       struct in_addr prefix, mask;
+       int error;
+
+       mask = target->ia_sockmask.sin_addr;
+       if (flags & RTF_HOST) {
+               prefix = target->ia_dstaddr.sin_addr;
+       } else {
+               prefix = target->ia_addr.sin_addr;
+               prefix.s_addr &= mask.s_addr;
+       }
+
+       TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
+               struct in_ifaddr *ia = iac->ia;
+               struct in_addr p;
+
+               /* Don't test against self */
+               if (ia == target)
+                       continue;
+
+               /* The tested address does not own a route entry */
+               if ((ia->ia_flags & IFA_ROUTE) == 0)
+                       continue;
+
+               /* Prefix test */
+               if (rtinitflags(ia)) {
+                       p = ia->ia_dstaddr.sin_addr;
+               } else {
+                       p = ia->ia_addr.sin_addr;
+                       p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
+               }
+               if (prefix.s_addr != p.s_addr)
+                       continue;
+
+               /*
+                * If the to-be-added address and the curretly being
+                * tested address are not host addresses, we need to
+                * take subnetmask into consideration.
+                */
+               if (!(flags & RTF_HOST) && !rtinitflags(ia) &&
+                   mask.s_addr != ia->ia_sockmask.sin_addr.s_addr)
+                       continue;
+
+               /*
+                * If we got a matching prefix route inserted by other
+                * interface address, we don't need to bother.
+                */
+               return 0;
+       }
+
+       /*
+        * No one seem to have prefix route; insert it.
+        */
+       error = rtinit(&target->ia_ifa, RTM_ADD, flags);
+       if (!error)
+               target->ia_flags |= IFA_ROUTE;
+       return error;
+}
+
+/*
+ * Remove a route to prefix ("connected route" in cisco terminology).
+ * Re-installs the route by using another interface address, if there's
+ * one with the same prefix (otherwise we lose the route mistakenly).
+ */
+static void
+in_scrubprefix(struct in_ifaddr *target)
+{
+       struct in_ifaddr_container *iac;
+       struct in_addr prefix, mask;
+       int error;
+
+       if ((target->ia_flags & IFA_ROUTE) == 0)
+               return;
+
+       mask = target->ia_sockmask.sin_addr;
+       if (rtinitflags(target)) {
+               prefix = target->ia_dstaddr.sin_addr;
+       } else {
+               prefix = target->ia_addr.sin_addr;
+               prefix.s_addr &= mask.s_addr;
+       }
+
+       TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
+               struct in_ifaddr *ia = iac->ia;
+               struct in_addr p;
+
+               /* Don't test against self */
+               if (ia == target)
+                       continue;
+
+               /* The tested address already owns a route entry */
+               if (ia->ia_flags & IFA_ROUTE)
+                       continue;
+
+               /*
+                * The prefix route of the tested address should
+                * never be installed if its parent interface is
+                * not UP yet.
+                */
+               if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
+                       continue;
+
+               /* Prefix test */
+               if (rtinitflags(ia)) {
+                       p = ia->ia_dstaddr.sin_addr;
+               } else {
+                       p = ia->ia_addr.sin_addr;
+                       p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
+               }
+               if (prefix.s_addr != p.s_addr)
+                       continue;
+
+               /*
+                * We don't need to test subnetmask here, as what we do
+                * in in_addprefix(), since if the the tested address's
+                * parent interface is UP, the tested address should own
+                * a prefix route entry and we would never reach here.
+                */
+
+               /*
+                * If we got a matching prefix route, move IFA_ROUTE to him
+                */
+               rtinit(&target->ia_ifa, RTM_DELETE, rtinitflags(target));
+               target->ia_flags &= ~IFA_ROUTE;
+
+               error = rtinit(&ia->ia_ifa, RTM_ADD, rtinitflags(ia) | RTF_UP);
+               if (!error)
+                       ia->ia_flags |= IFA_ROUTE;
+               return;
+       }
+
+       /*
+        * No candidates for this prefix route; just remove it.
+        */
+       rtinit(&target->ia_ifa, RTM_DELETE, rtinitflags(target));
+       target->ia_flags &= ~IFA_ROUTE;
+}
+
+#undef rtinitflags
 
 /*
  * Return 1 if the address might be a local broadcast address.