From b991d8c64bef36e86339b2950aba868d992ff2e5 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Wed, 4 Sep 2013 16:27:09 +0800 Subject: [PATCH 01/16] re: Add 8411/8168G/8168EP/8168GU/8411B support --- sys/dev/netif/re/if_re.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/sys/dev/netif/re/if_re.c b/sys/dev/netif/re/if_re.c index 652b449355..f07d22193c 100644 --- a/sys/dev/netif/re/if_re.c +++ b/sys/dev/netif/re/if_re.c @@ -249,6 +249,26 @@ static const struct re_hwrev re_hwrevs[] = { RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX }, + { RE_HWREV_8411, ETHERMTU, + RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | + RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX }, + + { RE_HWREV_8168G, ETHERMTU, + RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | + RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX }, + + { RE_HWREV_8168EP, ETHERMTU, + RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | + RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX }, + + { RE_HWREV_8168GU, ETHERMTU, + RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | + RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX }, + + { RE_HWREV_8411B, ETHERMTU, + RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | + RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX }, + { RE_HWREV_8100E, ETHERMTU, RE_C_HWCSUM | RE_C_FASTE }, @@ -1003,12 +1023,21 @@ re_probe(device_t dev) case RE_HWREV_8168F: case RE_HWREV_8111F: + case RE_HWREV_8168G: if (macmode == 0 || macmode == 0x100000) { sc->re_caps |= RE_C_EE_EADDR; sc->re_ee_eaddr = RE_EE_EADDR1; } break; + + case RE_HWREV_8411: + case RE_HWREV_8168EP: + case RE_HWREV_8168GU: + case RE_HWREV_8411B: + sc->re_caps |= RE_C_EE_EADDR; + sc->re_ee_eaddr = RE_EE_EADDR1; + break; } if (pci_is_pcie(dev)) sc->re_caps |= RE_C_PCIE; -- 2.41.0 From c29e94c004c19541efabb0a3dc8e66b021d91606 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Wed, 4 Sep 2013 16:30:29 +0800 Subject: [PATCH 02/16] em: Move max_frame_size from softc to HAL data struct It is needed to make I217 link status detection work --- sys/dev/netif/em/if_em.c | 21 ++++++++++++--------- sys/dev/netif/em/if_em.h | 1 - 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/sys/dev/netif/em/if_em.c b/sys/dev/netif/em/if_em.c index 1ad135d7a3..fe9a1d0fbc 100644 --- a/sys/dev/netif/em/if_em.c +++ b/sys/dev/netif/em/if_em.c @@ -573,7 +573,8 @@ em_attach(device_t dev) } /* Set the frame limits assuming standard ethernet sized frames. */ - adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; + adapter->hw.mac.max_frame_size = + ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; adapter->min_frame_size = ETH_ZLEN + ETHER_CRC_LEN; /* This controls when hardware reports transmit completion status. */ @@ -1099,7 +1100,7 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) } ifp->if_mtu = ifr->ifr_mtu; - adapter->max_frame_size = + adapter->hw.mac.max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; if (ifp->if_flags & IFF_RUNNING) @@ -2332,7 +2333,7 @@ em_reset(struct adapter *adapter) switch (adapter->hw.mac.type) { case e1000_82547: case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */ - if (adapter->max_frame_size > 8192) + if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */ else pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */ @@ -2376,7 +2377,7 @@ em_reset(struct adapter *adapter) default: /* Devices before 82547 had a Packet Buffer of 64K. */ - if (adapter->max_frame_size > 8192) + if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ @@ -2401,7 +2402,7 @@ em_reset(struct adapter *adapter) (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) << 10; adapter->hw.fc.high_water = rx_buffer_size - - roundup2(adapter->max_frame_size, 1024); + roundup2(adapter->hw.mac.max_frame_size, 1024); adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500; if (adapter->hw.mac.type == e1000_80003es2lan) @@ -3083,7 +3084,7 @@ em_newbuf(struct adapter *adapter, int i, int init) } m->m_len = m->m_pkthdr.len = MCLBYTES; - if (adapter->max_frame_size <= MCLBYTES - ETHER_ALIGN) + if (adapter->hw.mac.max_frame_size <= MCLBYTES - ETHER_ALIGN) m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_segment(adapter->rxtag, @@ -3410,11 +3411,12 @@ em_rxeof(struct adapter *adapter, int count) last_byte = *(mtod(mp, caddr_t) + desc_len - 1); if (TBI_ACCEPT(&adapter->hw, status, current_desc->errors, pkt_len, last_byte, - adapter->min_frame_size, adapter->max_frame_size)) { + adapter->min_frame_size, + adapter->hw.mac.max_frame_size)) { e1000_tbi_adjust_stats_82543(&adapter->hw, &adapter->stats, pkt_len, adapter->hw.mac.addr, - adapter->max_frame_size); + adapter->hw.mac.max_frame_size); if (len > 0) len--; } else { @@ -3483,7 +3485,8 @@ discard: mp->m_len = mp->m_pkthdr.len = MCLBYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; - if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) + if (adapter->hw.mac.max_frame_size <= + (MCLBYTES - ETHER_ALIGN)) m_adj(mp, ETHER_ALIGN); #endif if (adapter->fmp != NULL) { diff --git a/sys/dev/netif/em/if_em.h b/sys/dev/netif/em/if_em.h index b4dff954c1..a453142ceb 100644 --- a/sys/dev/netif/em/if_em.h +++ b/sys/dev/netif/em/if_em.h @@ -279,7 +279,6 @@ struct adapter { struct callout timer; struct callout tx_fifo_timer; int if_flags; - int max_frame_size; int min_frame_size; /* WOL register value */ -- 2.41.0 From 4c04919c1ec0a026faa5a1f7416c08a4733e1862 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Wed, 4 Sep 2013 17:25:34 +0800 Subject: [PATCH 03/16] re: Add 8401/8402/8106 support - Fix setup for certain generation of 8105E - 8101, 8102 and 8105 all need to extract ethernet address from EEPROM --- sys/dev/netif/re/if_re.c | 56 +++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/sys/dev/netif/re/if_re.c b/sys/dev/netif/re/if_re.c index f07d22193c..b896bbeda8 100644 --- a/sys/dev/netif/re/if_re.c +++ b/sys/dev/netif/re/if_re.c @@ -287,6 +287,18 @@ static const struct re_hwrev re_hwrevs[] = { RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | RE_C_AUTOPAD | RE_C_STOP_RXTX | RE_C_FASTE }, + { RE_HWREV_8401E, ETHERMTU, + RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | RE_C_AUTOPAD | + RE_C_STOP_RXTX | RE_C_FASTE }, + + { RE_HWREV_8402, ETHERMTU, + RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | RE_C_AUTOPAD | + RE_C_STOP_RXTX | RE_C_FASTE }, + + { RE_HWREV_8106E, ETHERMTU, + RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | RE_C_AUTOPAD | + RE_C_STOP_RXTX | RE_C_FASTE }, + { RE_HWREV_NULL, 0, 0 } }; @@ -1013,12 +1025,44 @@ re_probe(device_t dev) * Apply chip property fixup */ switch (sc->re_hwrev) { - case RE_HWREV_8101E: - if (macmode == 0 || - macmode == 0x200000) { - sc->re_caps |= RE_C_EE_EADDR; - sc->re_ee_eaddr = RE_EE_EADDR0; + case RE_HWREV_8168GU: + if (vendor == PCI_VENDOR_REALTEK && + product == PCI_PRODUCT_REALTEK_RT8101E) { + /* 8106EUS */ + sc->re_caps = RE_C_HWCSUM | RE_C_MAC2 | + RE_C_PHYPMGT | RE_C_AUTOPAD | + RE_C_STOP_RXTX | RE_C_FASTE; + sc->re_maxmtu = ETHERMTU; + device_printf(dev, "8106EUS fixup\n"); + } else { + /* 8168GU */ + goto ee_eaddr1; + } + break; + + case RE_HWREV_8168E: + if (vendor == PCI_VENDOR_REALTEK && + product == PCI_PRODUCT_REALTEK_RT8101E) { + /* 8105E */ + sc->re_caps = RE_C_HWCSUM | RE_C_MAC2 | + RE_C_PHYPMGT | RE_C_AUTOPAD | + RE_C_STOP_RXTX | RE_C_FASTE; + sc->re_maxmtu = ETHERMTU; + device_printf(dev, "8105E fixup\n"); + goto ee_eaddr0; } + /* 8168E */ + break; + + case RE_HWREV_8101E: + case RE_HWREV_8102E: + case RE_HWREV_8102EL: + case RE_HWREV_8401E: + case RE_HWREV_8105E: + case RE_HWREV_8106E: +ee_eaddr0: + sc->re_caps |= RE_C_EE_EADDR; + sc->re_ee_eaddr = RE_EE_EADDR0; break; case RE_HWREV_8168F: @@ -1033,8 +1077,8 @@ re_probe(device_t dev) case RE_HWREV_8411: case RE_HWREV_8168EP: - case RE_HWREV_8168GU: case RE_HWREV_8411B: +ee_eaddr1: sc->re_caps |= RE_C_EE_EADDR; sc->re_ee_eaddr = RE_EE_EADDR1; break; -- 2.41.0 From 1eb61d6bff840dc3e765e433d475f7908de1422a Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Wed, 4 Sep 2013 17:39:10 +0800 Subject: [PATCH 04/16] em.4: TSO is supported on all PCI-E chips; properly mention I217/I218 --- share/man/man4/em.4 | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/share/man/man4/em.4 b/share/man/man4/em.4 index 57e41131a2..dd3eaac38b 100644 --- a/share/man/man4/em.4 +++ b/share/man/man4/em.4 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD: src/share/man/man4/em.4,v 1.30 2008/10/06 21:55:53 simon Exp $ .\" -.Dd September 3, 2013 +.Dd September 4, 2013 .Dt EM 4 .Os .Sh NAME @@ -61,15 +61,11 @@ The .Nm driver provides support for PCI Gigabit Ethernet adapters based on the Intel 82540, 82541ER, 82541PI, 82542, 82543, 82544, 82545, 82546, -82546EB, 82546GB, 82547, 82571, 81572, 82573, and 82574 Ethernet -controller chips. +82546EB, 82546GB, 82547, 82571, 81572, 82573, 82574, I217 and I218 +Ethernet controller chips. The driver supports Transmit/Receive checksum offload and Jumbo Frames on all but 82542-based adapters. -Furthermore it supports TCP segmentation offload (TSO) on adapters -based on the 82571, 82572, 82573 and 82574 controller chips. -.\"For further hardware information, see the -.\".Pa README -.\"included with the driver. +Furthermore it supports TCP segmentation offload (TSO) on PCI-E adapters. .Pp The .Nm emx -- 2.41.0 From f97bcf5edfcccdf7a48e9a64c834b6955503301f Mon Sep 17 00:00:00 2001 From: Antonio Huete Jimenez Date: Wed, 4 Sep 2013 12:13:45 +0200 Subject: [PATCH 05/16] vkernel32 - Change VM_MAX_USER_ADDRESS to match the real kernel. Pointed-out-by: dillon --- sys/platform/vkernel/include/vmparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/platform/vkernel/include/vmparam.h b/sys/platform/vkernel/include/vmparam.h index fa63fe8022..33348f6698 100644 --- a/sys/platform/vkernel/include/vmparam.h +++ b/sys/platform/vkernel/include/vmparam.h @@ -76,7 +76,7 @@ #define KERNEL_KVA_SIZE 0x40000000 #define VM_MIN_USER_ADDRESS 0x00000000 -#define VM_MAX_USER_ADDRESS 0xBFC00000 /* XXX match to real kernel */ +#define VM_MAX_USER_ADDRESS 0x9FC00000 /* XXX match to real kernel */ #define USRSTACK VM_MAX_USER_ADDRESS -- 2.41.0 From 5339dfe4986a925a95b80dbf103e448cb44c3bf6 Mon Sep 17 00:00:00 2001 From: Antonio Huete Jimenez Date: Wed, 4 Sep 2013 12:22:27 +0200 Subject: [PATCH 06/16] tmpfs - Remove duplicated checks in tmpfs_chflags() * Do not duplicate the checks that vop_helper_setattr_flags() already performed. * On tmpfs_mount(), root node already has set SF_NOCACHE on its vnode. --- sys/vfs/tmpfs/tmpfs_subr.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/sys/vfs/tmpfs/tmpfs_subr.c b/sys/vfs/tmpfs/tmpfs_subr.c index 678fd1da3c..8310b88991 100644 --- a/sys/vfs/tmpfs/tmpfs_subr.c +++ b/sys/vfs/tmpfs/tmpfs_subr.c @@ -1032,29 +1032,10 @@ tmpfs_chflags(struct vnode *vp, int vaflags, struct ucred *cred) return EROFS; error = vop_helper_setattr_flags(&flags, vaflags, node->tn_uid, cred); - /* - * Unprivileged processes are not permitted to unset system - * flags, or modify flags if any system flags are set. - * - * Silently enforce SF_NOCACHE on the root tmpfs vnode so - * tmpfs data is not double-cached by swapcache. - */ + /* Actually change the flags on the node itself */ if (error == 0) { TMPFS_NODE_LOCK(node); - if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { - if (vp->v_flag & VROOT) - flags |= SF_NOCACHE; - node->tn_flags = flags; - } else { - if (node->tn_flags & (SF_NOUNLINK | SF_IMMUTABLE | - SF_APPEND) || - (flags & UF_SETTABLE) != flags) { - error = EPERM; - } else { - node->tn_flags &= SF_SETTABLE; - node->tn_flags |= (flags & UF_SETTABLE); - } - } + node->tn_flags = flags; node->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(node); } -- 2.41.0 From a5807b81a3e72af627e4d1050b80ac9391c33246 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Wed, 4 Sep 2013 20:36:11 +0800 Subject: [PATCH 07/16] emx: Add I217/I218 According to the datasheet, these two chips support 2 RX queues at least. --- sys/dev/netif/em/if_em.c | 8 ++-- sys/dev/netif/emx/if_emx.c | 88 +++++++++++++++++++++++++++++++++----- sys/dev/netif/emx/if_emx.h | 6 ++- 3 files changed, 85 insertions(+), 17 deletions(-) diff --git a/sys/dev/netif/em/if_em.c b/sys/dev/netif/em/if_em.c index fe9a1d0fbc..ac4ebb266a 100644 --- a/sys/dev/netif/em/if_em.c +++ b/sys/dev/netif/em/if_em.c @@ -233,10 +233,10 @@ static const struct em_vendor_info em_vendor_info_array[] = { EM_DEVICE(PCH2_LV_LM), EM_DEVICE(PCH2_LV_V), - EM_DEVICE(PCH_LPT_I217_LM), - EM_DEVICE(PCH_LPT_I217_V), - EM_DEVICE(PCH_LPTLP_I218_LM), - EM_DEVICE(PCH_LPTLP_I218_V), + EM_EMX_DEVICE(PCH_LPT_I217_LM), + EM_EMX_DEVICE(PCH_LPT_I217_V), + EM_EMX_DEVICE(PCH_LPTLP_I218_LM), + EM_EMX_DEVICE(PCH_LPTLP_I218_V), /* required last entry */ EM_DEVICE_NULL diff --git a/sys/dev/netif/emx/if_emx.c b/sys/dev/netif/emx/if_emx.c index bf3ecc9bcb..d5db862b86 100644 --- a/sys/dev/netif/emx/if_emx.c +++ b/sys/dev/netif/emx/if_emx.c @@ -161,6 +161,11 @@ static const struct emx_device { EMX_DEVICE(82574L), EMX_DEVICE(82574LA), + EMX_DEVICE(PCH_LPT_I217_LM), + EMX_DEVICE(PCH_LPT_I217_V), + EMX_DEVICE(PCH_LPTLP_I218_LM), + EMX_DEVICE(PCH_LPTLP_I218_V), + /* required last entry */ EMX_DEVICE_NULL }; @@ -554,6 +559,31 @@ emx_attach(device_t dev) sc->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); sc->hw.back = &sc->osdep; + /* + * For I217/I218, we need to map the flash memory and this + * must happen after the MAC is identified. + */ + if (sc->hw.mac.type == e1000_pch_lpt) { + sc->flash_rid = EMX_BAR_FLASH; + + sc->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &sc->flash_rid, RF_ACTIVE); + if (sc->flash == NULL) { + device_printf(dev, "Mapping of Flash failed\n"); + error = ENXIO; + goto fail; + } + sc->osdep.flash_bus_space_tag = rman_get_bustag(sc->flash); + sc->osdep.flash_bus_space_handle = + rman_get_bushandle(sc->flash); + + /* + * This is used in the shared code + * XXX this goof is actually not used. + */ + sc->hw.flash_address = (uint8_t *)sc->flash; + } + /* Do Shared Code initialization */ if (e1000_setup_init_funcs(&sc->hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); @@ -598,8 +628,7 @@ emx_attach(device_t dev) } /* Set the frame limits assuming standard ethernet sized frames. */ - sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; - sc->min_frame_size = ETHER_MIN_LEN; + sc->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; /* This controls when hardware reports transmit completion status. */ sc->hw.mac.report_tx_early = 1; @@ -611,6 +640,9 @@ emx_attach(device_t dev) /* * Calculate # of TX rings * + * XXX + * I217/I218 claims to have 2 TX queues + * * NOTE: * Don't enable multiple TX queues on 82574; it always gives * watchdog timeout on TX queue0, when multiple TCP streams are @@ -643,6 +675,9 @@ emx_attach(device_t dev) "PHY reset is blocked due to SOL/IDER session.\n"); } + /* Disable EEE on I217/I218 */ + sc->hw.dev_spec.ich8lan.eee_disable = 1; + /* * Start from a known state, this is important in reading the * nvm and mac from that. @@ -878,6 +913,11 @@ emx_detach(device_t dev) sc->memory); } + if (sc->flash != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, sc->flash_rid, + sc->flash); + } + emx_dma_free(sc); /* Free sysctl tree */ @@ -1026,6 +1066,7 @@ emx_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) case e1000_82571: case e1000_82572: case e1000_82574: + case e1000_pch_lpt: case e1000_80003es2lan: max_frame_size = 9234; break; @@ -1041,8 +1082,8 @@ emx_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) } ifp->if_mtu = ifr->ifr_mtu; - sc->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + - ETHER_CRC_LEN; + sc->hw.mac.max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + + ETHER_CRC_LEN; if (ifp->if_flags & IFF_RUNNING) emx_init(sc); @@ -1870,9 +1911,13 @@ emx_reset(struct emx_softc *sc) pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; + case e1000_pch_lpt: + pba = E1000_PBA_26K; + break; + default: /* Devices before 82547 had a Packet Buffer of 64K. */ - if (sc->max_frame_size > 8192) + if (sc->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ @@ -1896,16 +1941,30 @@ emx_reset(struct emx_softc *sc) rx_buffer_size = (E1000_READ_REG(&sc->hw, E1000_PBA) & 0xffff) << 10; sc->hw.fc.high_water = rx_buffer_size - - roundup2(sc->max_frame_size, 1024); + roundup2(sc->hw.mac.max_frame_size, 1024); sc->hw.fc.low_water = sc->hw.fc.high_water - 1500; - if (sc->hw.mac.type == e1000_80003es2lan) - sc->hw.fc.pause_time = 0xFFFF; - else - sc->hw.fc.pause_time = EMX_FC_PAUSE_TIME; + sc->hw.fc.pause_time = EMX_FC_PAUSE_TIME; sc->hw.fc.send_xon = TRUE; sc->hw.fc.requested_mode = e1000_fc_full; + /* + * Device specific overrides/settings + */ + if (sc->hw.mac.type == e1000_pch_lpt) { + sc->hw.fc.high_water = 0x5C20; + sc->hw.fc.low_water = 0x5048; + sc->hw.fc.pause_time = 0x0650; + sc->hw.fc.refresh_time = 0x0400; + /* Jumbos need adjusted PBA */ + if (sc->arpcom.ac_if.if_mtu > ETHERMTU) + E1000_WRITE_REG(&sc->hw, E1000_PBA, 12); + else + E1000_WRITE_REG(&sc->hw, E1000_PBA, 26); + } else if (sc->hw.mac.type == e1000_80003es2lan) { + sc->hw.fc.pause_time = 0xFFFF; + } + /* Issue a global reset */ e1000_reset_hw(&sc->hw); E1000_WRITE_REG(&sc->hw, E1000_WUC, 0); @@ -2615,7 +2674,7 @@ emx_newbuf(struct emx_rxdata *rdata, int i, int init) } m->m_len = m->m_pkthdr.len = MCLBYTES; - if (rdata->sc->max_frame_size <= MCLBYTES - ETHER_ALIGN) + if (rdata->sc->hw.mac.max_frame_size <= MCLBYTES - ETHER_ALIGN) m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_segment(rdata->rxtag, @@ -2953,6 +3012,13 @@ emx_init_rx_unit(struct emx_softc *sc) sc->rx_data[i].num_rx_desc - 1); } + if (sc->hw.mac.type >= e1000_pch2lan) { + if (ifp->if_mtu > ETHERMTU) + e1000_lv_jumbo_workaround_ich8lan(&sc->hw, TRUE); + else + e1000_lv_jumbo_workaround_ich8lan(&sc->hw, FALSE); + } + /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | diff --git a/sys/dev/netif/emx/if_emx.h b/sys/dev/netif/emx/if_emx.h index 490762766d..1da9b4edd1 100644 --- a/sys/dev/netif/emx/if_emx.h +++ b/sys/dev/netif/emx/if_emx.h @@ -128,6 +128,7 @@ #define EMX_VENDOR_ID 0x8086 #define EMX_BAR_MEM PCIR_BAR(0) +#define EMX_BAR_FLASH PCIR_BAR(1) #define EMX_JUMBO_PBA 0x00000028 #define EMX_DEFAULT_PBA 0x00000030 @@ -352,6 +353,9 @@ struct emx_softc { struct resource *memory; int memory_rid; + struct resource *flash; + int flash_rid; + struct resource *intr_res; void *intr_tag; int intr_rid; @@ -360,8 +364,6 @@ struct emx_softc { struct ifmedia media; struct callout timer; int if_flags; - int max_frame_size; - int min_frame_size; /* WOL register value */ int wol; -- 2.41.0 From a1cbf0146a341ae0c37243324773e46f035fb702 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Wed, 4 Sep 2013 21:33:18 +0800 Subject: [PATCH 08/16] em.4: I217 and I218 are taken by emx --- share/man/man4/em.4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/share/man/man4/em.4 b/share/man/man4/em.4 index dd3eaac38b..e00b6fad8c 100644 --- a/share/man/man4/em.4 +++ b/share/man/man4/em.4 @@ -71,8 +71,8 @@ The .Nm emx is a version of the .Nm em -driver for 82571, 82572, 82573, and 82574 Ethernet controller chips that -additionally supports Receive Side Scaling (RSS, 2 reception queues). +driver for 82571, 82572, 82573, 82574, I217 and I218 Ethernet controller chips +that additionally supports Receive Side Scaling (RSS, 2 reception queues). By default, the .Nm emx -- 2.41.0 From 514735b13d6e0430400d51848a75c27a64f52178 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 4 Sep 2013 18:18:59 +0200 Subject: [PATCH 09/16] kernel/drm: Fix AGP detection for Matrox cards. Since DRM drivers attach to vgapci, we need to look at the grandparent. --- sys/dev/drm/mga/mga_drv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sys/dev/drm/mga/mga_drv.c b/sys/dev/drm/mga/mga_drv.c index f0835a4263..60e1d0e61e 100644 --- a/sys/dev/drm/mga/mga_drv.c +++ b/sys/dev/drm/mga/mga_drv.c @@ -71,11 +71,7 @@ static int mga_driver_device_is_agp(struct drm_device * dev) * device is 0x0021 (HB6 Universal PCI-PCI bridge), we reject the * device. */ -#if __FreeBSD_version >= 700010 bus = device_get_parent(device_get_parent(dev->device)); -#else - bus = device_get_parent(dev->device); -#endif if (pci_get_device(dev->device) == 0x0525 && pci_get_vendor(bus) == 0x3388 && pci_get_device(bus) == 0x0021) -- 2.41.0 From 2a810c210b34f19e4102f4047516d132bdce66a7 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 4 Sep 2013 10:39:55 -0700 Subject: [PATCH 10/16] kernel - Implement IPV6 subnet routing / proxy ND6 (equiv to proxy ARP) * Do not require per-host RTF_ANNOUNCE/AF_LINK entries. They still work but they aren't needed any more (and they are such a huge bitch to set up anyway... best to avoid them). * Machine must have net.inet6.ip6.forwarding mode enabled. * Internet-facing interface must be promiscuous mode. * Will automatically proxy ND6 any subnets if the interface is different from the one receiving the multicast. So e.g. you can route IPV6 which would otherwise have to be switched. The subnet interface must currently be different because if it were the same the solicitation would be directly received by the target host anyway (being a multicast) and we would compete with it. This is also a good safety. Example: ifconfig igb0 inet6 2999:499:1:555:1::72/80 For DNS ifconfig igb0 inet6 2999:499:1:555:1::1/80 For subnet default route ifconfig igb1 inet6 2999:499:1:555::2/80 For internet router ifconfig igb1 promisc route add -inet6 default 2999:499:1:555::1 The internet router is doing a terminal /64 block, e.g. it's address is 2999:499:1:555::1/64, but we want to break the net up further and route portions of it instead of switch. --- sys/netinet6/ip6_input.c | 20 ++++------------ sys/netinet6/nd6_nbr.c | 52 ++++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 1e6d7cdf74..6051102c20 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -469,24 +469,14 @@ ip6_input(netmsg_t msg) /* * Multicast check + * + * WARNING! For general subnet proxying the interface hw will + * likely filter out the multicast solicitations, so + * the interface must be in promiscuous mode. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { - struct in6_multi *in6m = NULL; - + ours = 1; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast); - /* - * See if we belong to the destination multicast group on the - * arrival interface. - */ - IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m); - if (in6m) - ours = 1; - else if (!ip6_mrouter) { - ip6stat.ip6s_notmember++; - ip6stat.ip6s_cantforward++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); - goto bad; - } deliverifp = m->m_pkthdr.rcvif; goto hbhcheck; } diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index db399149a2..d55ed7516c 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -213,8 +213,9 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) /* * Target address (taddr6) must be either: - * (1) Valid unicast/anycast address for my receiving interface, - * (2) Unicast address for which I'm offering proxy service, or + * (1) Valid unicast/anycast address for my receiving interface. + * (2) Unicast or anycast address for which I'm offering proxy + * service. * (3) "tentative" address on which DAD is being performed. */ /* (1) and (3) check. */ @@ -227,8 +228,28 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); #endif - /* (2) check. */ - if (!ifa) { + /* + * (2) Check proxying. Requires ip6_forwarding to be turned on. + * + * If the packet is anycast the target route must be on a + * different interface because the anycast will get anything + * on the current interface. + * + * If the packet is unicast the target route may be on the + * same interface. If the gateway is a (typically manually + * configured) link address we can directly offer it. + * XXX for now we don't do this but instead offer ours and + * presumably relay. + * + * WARNING! Since this is a subnet proxy the interface proxying + * the ND6 must be in promiscuous mode or it will not see the + * solicited multicast requests for various hosts being proxied. + * + * (In the specific-host-proxy case via RTF_ANNOUNCE, which is + * a bitch to configure, a specific multicast route is already + * added for that host <-- NOT RECOMMENDED). + */ + if (!ifa && ip6_forwarding) { struct rtentry *rt; struct sockaddr_in6 tsin6; @@ -238,16 +259,27 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) tsin6.sin6_addr = taddr6; rt = rtpurelookup((struct sockaddr *)&tsin6); - if (rt != NULL && (rt->rt_flags & RTF_ANNOUNCE) && - rt->rt_gateway->sa_family == AF_LINK) { - /* - * proxy NDP for single entry - */ + if (rt != NULL && + (ifp != rt->rt_ifp || + (ifp == rt->rt_ifp && (m->m_flags & M_MCAST) == 0)) + ) { ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); + nd6log((LOG_INFO, + "nd6_ns_input: nd6 proxy %s<-%s ifa %p\n", + if_name(ifp), if_name(rt->rt_ifp), ifa)); if (ifa) { proxy = 1; - proxydl = SDL(rt->rt_gateway); + /* + * Manual link address on same interface + * w/announce flag will proxy-arp using + * target mac, else our mac is used. + */ + if (ifp == rt->rt_ifp && + (rt->rt_flags & RTF_ANNOUNCE) && + rt->rt_gateway->sa_family == AF_LINK) { + proxydl = SDL(rt->rt_gateway); + } } } if (rt != NULL) -- 2.41.0 From fa0b2ca96e94558b08064703c173255bf88d0b8e Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 4 Sep 2013 22:13:04 +0200 Subject: [PATCH 11/16] libsmb: Allow libsmb to detect if smbfs.ko is loaded. --- contrib/smbfs/lib/smb/subr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/contrib/smbfs/lib/smb/subr.c b/contrib/smbfs/lib/smb/subr.c index cd65315e59..b933b508ef 100644 --- a/contrib/smbfs/lib/smb/subr.c +++ b/contrib/smbfs/lib/smb/subr.c @@ -74,7 +74,6 @@ smb_lib_init(void) if (smblib_initialized) return 0; -#if __FreeBSD_version > 400000 error = sysctlbyname("net.smb.version", &kv, &kvlen, NULL, 0); if (error) { warnx("%s: can't find kernel module\n", __FUNCTION__); @@ -84,7 +83,6 @@ smb_lib_init(void) warnx("%s: kernel module version(%d) don't match library(%d).\n", __FUNCTION__, kv, NSMB_VERSION); return EINVAL; } -#endif if ((error = nls_setlocale("")) != 0) { warnx("%s: can't initialise locale\n", __FUNCTION__); return error; -- 2.41.0 From a3d0e4989603f4c1b2637b90c8d3bbbbac623163 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 4 Sep 2013 13:59:14 -0700 Subject: [PATCH 12/16] kernel - IPV6 subnet routing / proxy ND6 (bridge support) * Support IPV6 subnet routing and proxy ND6 through bridged interfaces. Essentially all the members of the bridge have to be treated as the bridge itself for comparison and MAC address handling. * Prevents the nd6 proxy code from treating two interfaces which are part of the same bridge as being different, which would trigger an improper proxy ND6. --- sys/netinet6/nd6_nbr.c | 47 +++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index d55ed7516c..49d80c2575 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -103,6 +103,7 @@ void nd6_ns_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ifnet *cmpifp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_neighbor_solicit *nd_ns; struct in6_addr saddr6 = ip6->ip6_src; @@ -117,6 +118,14 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) union nd_opts ndopts; struct sockaddr_dl *proxydl = NULL; + /* + * Collapse interfaces to the bridge for comparison and + * mac (llinfo) purposes. + */ + cmpifp = ifp; + if (ifp->if_bridge) + cmpifp = ifp->if_bridge; + #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off); @@ -154,8 +163,11 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) } else { /* * Make sure the source address is from a neighbor's address. + * + * XXX probably only need to check cmpifp. */ - if (in6ifa_ifplocaladdr(ifp, &saddr6) == NULL) { + if (in6ifa_ifplocaladdr(cmpifp, &saddr6) == NULL && + in6ifa_ifplocaladdr(ifp, &saddr6) == NULL) { nd6log((LOG_INFO, "nd6_ns_input: " "NS packet from non-neighbor\n")); goto bad; @@ -245,6 +257,10 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) * the ND6 must be in promiscuous mode or it will not see the * solicited multicast requests for various hosts being proxied. * + * WARNING! Since this is a subnet proxy we have to treat bridge + * interfaces as being the bridge itself so we do not proxy-nd6 + * between bridge interfaces (which are effectively switched). + * * (In the specific-host-proxy case via RTF_ANNOUNCE, which is * a bitch to configure, a specific multicast route is already * added for that host <-- NOT RECOMMENDED). @@ -252,6 +268,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) if (!ifa && ip6_forwarding) { struct rtentry *rt; struct sockaddr_in6 tsin6; + struct ifnet *rtifp; bzero(&tsin6, sizeof tsin6); tsin6.sin6_len = sizeof(struct sockaddr_in6); @@ -259,15 +276,20 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) tsin6.sin6_addr = taddr6; rt = rtpurelookup((struct sockaddr *)&tsin6); + rtifp = rt ? rt->rt_ifp : NULL; + if (rtifp && rtifp->if_bridge) + rtifp = rtifp->if_bridge; + if (rt != NULL && - (ifp != rt->rt_ifp || - (ifp == rt->rt_ifp && (m->m_flags & M_MCAST) == 0)) + (cmpifp != rtifp || + (cmpifp == rtifp && (m->m_flags & M_MCAST) == 0)) ) { - ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, + ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(cmpifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); nd6log((LOG_INFO, - "nd6_ns_input: nd6 proxy %s<-%s ifa %p\n", - if_name(ifp), if_name(rt->rt_ifp), ifa)); + "nd6_ns_input: nd6 proxy %s(%s)<-%s ifa %p\n", + if_name(cmpifp), if_name(ifp), + if_name(rtifp), ifa)); if (ifa) { proxy = 1; /* @@ -275,7 +297,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) * w/announce flag will proxy-arp using * target mac, else our mac is used. */ - if (ifp == rt->rt_ifp && + if (cmpifp == rtifp && (rt->rt_flags & RTF_ANNOUNCE) && rt->rt_gateway->sa_family == AF_LINK) { proxydl = SDL(rt->rt_gateway); @@ -299,11 +321,11 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED) goto freeit; - if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { + if (lladdr && ((cmpifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s " "(if %d, NS packet %d)\n", - ip6_sprintf(&taddr6), ifp->if_addrlen, lladdrlen - 2)); + ip6_sprintf(&taddr6), cmpifp->if_addrlen, lladdrlen - 2)); goto bad; } @@ -350,8 +372,8 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { saddr6 = kin6addr_linklocal_allnodes; - saddr6.s6_addr16[1] = htons(ifp->if_index); - nd6_na_output(ifp, &saddr6, &taddr6, + saddr6.s6_addr16[1] = htons(cmpifp->if_index); + nd6_na_output(cmpifp, &saddr6, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), @@ -359,7 +381,8 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) goto freeit; } - nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_NEIGHBOR_SOLICIT, 0); + nd6_cache_lladdr(cmpifp, &saddr6, lladdr, + lladdrlen, ND_NEIGHBOR_SOLICIT, 0); nd6_na_output(ifp, &saddr6, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) -- 2.41.0 From e1c19d3abfe693cd96b285990b91986017b5aa2a Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Thu, 5 Sep 2013 00:14:37 +0200 Subject: [PATCH 13/16] de.4: Mention that Microsoft Virtual PC and Hyper-V have de(4) adapters. Therefore, we'd like to keep this driver. Hyper-V also comes with its specific virtual adapter, but we don't have support for that yet. --- share/man/man4/de.4 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/share/man/man4/de.4 b/share/man/man4/de.4 index 2467be706b..8cbeb82661 100644 --- a/share/man/man4/de.4 +++ b/share/man/man4/de.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD: src/share/man/man4/de.4,v 1.9.2.4 2001/08/17 13:08:37 ru Exp $ .\" -.Dd January 19, 1997 +.Dd September 5, 2013 .Dt DE 4 .Os .Sh NAME @@ -43,6 +43,8 @@ It supports the DEC PCI DE435 card, DEC DE450, DEC DE500, SMC 8432, 9332 and 9334, Cogent EM100FX and EM440TX, Asante, ZNYX ZX3xx, and others based on the 21040 and 21041 Ethernet controllers or the 21140[A], 21141, 21142 and 21143 Fast 100Mbps Ethernet controllers. +A DEC DC21x4x adapter is also found in Microsoft Virtual PC and (as the +legacy adapter) in Hyper-V. .Pp The .Nm -- 2.41.0 From f0bb593fb6e9d3ee1cc335b8073b092ac95848fb Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 4 Sep 2013 16:27:56 -0700 Subject: [PATCH 14/16] kernel - ipv6 - correct route table callout timer calculation * Correct the route table callout timer calculation which geometrically increases to the point where it isn't called any more. --- sys/netinet6/in6_rmx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index 613b5e9931..5e8d35cdf2 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -380,7 +380,12 @@ in6_rtqtimo(void *rock) } atv.tv_usec = 0; - atv.tv_sec = arg.nextstop; + atv.tv_sec = arg.nextstop - time_second; + if (atv.tv_sec < time_second) { + kprintf("invalid mtu expiration time on routing table\n"); + arg.nextstop = time_second + 30; /* last resort */ + atv.tv_sec = 30; + } callout_reset(&in6_rtqtimo_ch[mycpuid], tvtohz_high(&atv), in6_rtqtimo, rock); } @@ -431,10 +436,11 @@ in6_mtutimo(void *rock) crit_exit(); atv.tv_usec = 0; - atv.tv_sec = arg.nextstop; + atv.tv_sec = arg.nextstop - time_second; if (atv.tv_sec < time_second) { kprintf("invalid mtu expiration time on routing table\n"); arg.nextstop = time_second + 30; /* last resort */ + atv.tv_sec = 30; } callout_reset(&in6_mtutimo_ch[mycpuid], tvtohz_high(&atv), in6_mtutimo, rock); -- 2.41.0 From 509bc517a3efdc2540ce5ca92d2ef4afec4e8b6d Mon Sep 17 00:00:00 2001 From: Antonio Huete Jimenez Date: Wed, 22 Aug 2012 14:53:18 +0200 Subject: [PATCH 15/16] Bring in DIRFS: A filesystem for VKERNELS * What is DIRFS? dirfs is a pseudo-filesystem specific for vkernel(7) which allows mounting host's directories into the vkernel. It runs directly in the vkernel's VFS code, as any other regular filesystem, but it does syscalls (vkernels are userland programs) to retrieve or post the information needed on every operation requested. Needless to say that the operations that you can perform in the host directories/files depend on the permissions the user that runs the vkernel. For example, you will not be able to 'chflags schg' if you run the vkernel with a regular user and not with root. * How does it work? It basically works like any other filesystem. It has its own mount_dirfs command that will be called by the system's mount(8) command when needed. vkernel64 # mount -t dirfs /usr/src2 /mnt vkernel64 # df -h /mnt Filesystem Size Used Avail Capacity Mounted on dirfs@/usr/src2 47G 36G 12G 75% /mnt Umounting is a normal operation too: vkernel64 # mount | fgrep dirfs dirfs@/usr/src2 on /mnt (dirfs) vkernel64 # umount /mnt * What's the current status Currently it is in a *experimental* status, with (probably) many bugs and some parts missing. TODO - Make dirfs mpsafe. - Fix problems with multiple mount points. - Implement VOP_NLINK so that hardlinks are possible. - Add missing kqueue(2) support. - dirfs root so that a vkernel can be booted from it. - Locking mechanisms for opened fds between host <-> vkernel. - Make sure dirfs is properly restored after vkernel checkpointing (upcoming GSoC project). - Bug hunting & bug fixing. - Any ideas? --- sbin/Makefile | 1 + sbin/mount_dirfs/Makefile | 10 + sbin/mount_dirfs/mount_dirfs.8 | 66 ++ sbin/mount_dirfs/mount_dirfs.c | 159 +++ share/man/man5/Makefile | 1 + share/man/man5/dirfs.5 | 103 ++ sys/platform/vkernel/conf/files | 3 + sys/platform/vkernel/conf/options | 6 + sys/platform/vkernel64/conf/files | 3 + sys/platform/vkernel64/conf/options | 6 + sys/vfs/dirfs/dirfs.h | 266 +++++ sys/vfs/dirfs/dirfs_subr.c | 891 ++++++++++++++++ sys/vfs/dirfs/dirfs_vfsops.c | 322 ++++++ sys/vfs/dirfs/dirfs_vnops.c | 1500 +++++++++++++++++++++++++++ 14 files changed, 3337 insertions(+) create mode 100644 sbin/mount_dirfs/Makefile create mode 100644 sbin/mount_dirfs/mount_dirfs.8 create mode 100644 sbin/mount_dirfs/mount_dirfs.c create mode 100644 share/man/man5/dirfs.5 create mode 100644 sys/vfs/dirfs/dirfs.h create mode 100644 sys/vfs/dirfs/dirfs_subr.c create mode 100644 sys/vfs/dirfs/dirfs_vfsops.c create mode 100644 sys/vfs/dirfs/dirfs_vnops.c diff --git a/sbin/Makefile b/sbin/Makefile index c8663fca1d..0c52329e2c 100644 --- a/sbin/Makefile +++ b/sbin/Makefile @@ -50,6 +50,7 @@ SUBDIR= adjkerntz \ mount_ufs \ mount_cd9660 \ mount_devfs \ + mount_dirfs \ mount_ext2fs \ mount_hammer \ mount_hpfs \ diff --git a/sbin/mount_dirfs/Makefile b/sbin/mount_dirfs/Makefile new file mode 100644 index 0000000000..2e289de772 --- /dev/null +++ b/sbin/mount_dirfs/Makefile @@ -0,0 +1,10 @@ +PROG= mount_dirfs +SRCS= mount_dirfs.c +MAN= mount_dirfs.8 + +LDADD= -lutil +DPADD= ${LIBUTIL} + +#CFLAGS+= -I${.CURDIR}/../../sys + +.include diff --git a/sbin/mount_dirfs/mount_dirfs.8 b/sbin/mount_dirfs/mount_dirfs.8 new file mode 100644 index 0000000000..5e5039017b --- /dev/null +++ b/sbin/mount_dirfs/mount_dirfs.8 @@ -0,0 +1,66 @@ +.\" +.\" Copyright (c) 2013 Antonio Huete Jimenez +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd September 5, 2013 +.Dt MOUNT_DIRFS 8 +.Os +.Sh NAME +.Nm mount_dirfs +.Nd mount an host directory inside a vkernel +.Sh SYNOPSIS +.Nm +.Op Fl o Ar options +.Ar hostdir +.Ar mount_point +.Sh DESCRIPTION +The +.Nm +allows +.Xr vkernel 7 +to access host directories with minimal configuration. +.Pp +The following options are supported: +.Bl -tag -width XoXoptions +.It Fl o Ar options +Options are specified with a +.Fl o +flag followed by a comma-separated string of options. +See the +.Xr mount 8 +and +.Xr dirfs 5 +man page for possible options and their meanings. +.El +.Sh EXAMPLES +The command below mounts host directory +.Pa /usr/src +on vkernel's directory +.Pa /mnt : +.Pp +.Ic "mount -t dirfs /usr/src /mnt" +.Sh SEE ALSO +.Xr fstab 5 , +.Xr dirfs 5 , +.Xr mount 8 diff --git a/sbin/mount_dirfs/mount_dirfs.c b/sbin/mount_dirfs/mount_dirfs.c new file mode 100644 index 0000000000..65dd41e1c4 --- /dev/null +++ b/sbin/mount_dirfs/mount_dirfs.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2013 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Antonio Huete Jimenez + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MOPT_UPDATE { "update", 0, MNT_UPDATE, 0 } +#define PLATFORM_LEN 16 + +static struct mntopt mopts[] = { MOPT_STDOPTS, MOPT_UPDATE, MOPT_NULL }; + +static void usage(void); + +int +main(int ac, char **av) +{ + struct vfsconf vfc; + int mount_flags = 0; + int error; + int ch; + int init_flags = 0; + char *mountpt, *hostdir; + size_t vsize; + char platform[PLATFORM_LEN] = {0}; + + mount_flags = 0; + + while ((ch = getopt(ac, av, "o:u")) != -1) { + switch(ch) { + case 'u': + init_flags |= MNT_UPDATE; + break; + + case 'o': + getmntopts(optarg, mopts, &mount_flags, NULL); + break; + default: + usage(); + /* not reached */ + } + } + ac -= optind; + av += optind; + mount_flags |= init_flags; + + /* + * Check we're in a vkernel or abort. + */ + vsize = PLATFORM_LEN; + error = sysctlbyname("hw.platform", &platform, &vsize, NULL,0); + if (error) + errx(1, "Failed to get hw.platform sysctl"); + + if (strnstr(platform, "vkernel", PLATFORM_LEN) == NULL) + errx(1, "dirfs is only available for vkernels."); + + /* + * Only the mount point need be specified in update mode. + */ + if (init_flags & MNT_UPDATE) { + if (ac != 1) { + usage(); + /* not reached */ + } + mountpt = av[0]; + if (mount(vfc.vfc_name, mountpt, mount_flags, NULL)) + err(1, "mountpoint %s", mountpt); + exit(0); + } + + if (ac < 2) { + usage(); + /* not reached */ + } + + hostdir = av[0]; + mountpt = av[1]; + + /* + * Load the dirfs module if necessary (this bit stolen from + * mount_null). + */ + error = getvfsbyname("dirfs", &vfc); + if (error && vfsisloadable("dirfs")) { + if (vfsload("dirfs") != 0) + err(1, "vfsload(dirfs)"); + endvfsent(); + error = getvfsbyname("dirfs", &vfc); + } + if (error) + errx(1, "dirfs filesystem is not available"); + + error = mount(vfc.vfc_name, mountpt, mount_flags, hostdir); + if (error) + err(1, "failed to mount %s on %s", hostdir, mountpt); + + exit (0); +} + +static +void +usage(void) +{ + fprintf(stderr, "usage: mount_dirfs [-u] [-o options] " + "hostdir dir\n"); + exit(1); +} diff --git a/share/man/man5/Makefile b/share/man/man5/Makefile index 9e022ed929..cc25f0565d 100644 --- a/share/man/man5/Makefile +++ b/share/man/man5/Makefile @@ -10,6 +10,7 @@ MAN= acct.5 \ devfs.5 \ devtab.5 \ dir.5 \ + dirfs.5 \ disktab.5 \ elf.5 \ ethers.5 \ diff --git a/share/man/man5/dirfs.5 b/share/man/man5/dirfs.5 new file mode 100644 index 0000000000..8f6b626f65 --- /dev/null +++ b/share/man/man5/dirfs.5 @@ -0,0 +1,103 @@ +.\" +.\" Copyright (c) 2013 Antonio Huete Jimenez +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. The name of the author may not be used to endorse or promote products +.\" derived from this software without specific prior written permission +.\" +.\" THIS DOCUMENTATION IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.\" +.Dd September 5, 2013 +.Dt DIRFS 5 +.Os +.Sh NAME +.Nm dirfs +.Nd "pseudo-filesystem for vkernel" +.Sh SYNOPSIS +To compile this driver into the vkernel, +place the following line in your +vkernel configuration file: +.Bd -ragged -offset indent +.Cd "options DIRFS" +.Ed +.Pp +Actually this driver does not provide a loadable module. +.Pp +In +.Xr fstab 5 : +.Bd -literal -compact +/usr/src /mnt dirfs rw 0 0 +.Sh DESCRIPTION +.Nm +was born from the idea of providing an easy way for +.Xr vkernel 7 +to access host's directories without any sort of configuration as it would be +needed by NFS for example. +.Pp +It runs directly in the vkernel's +.Xr VFS 9 +code, as any other regular filesystem but it uses syscalls to retrieve the +information needed for every operation requested. +.Pp +It should be noted that when the vkernel is run by a regular user, the +operations +.Nm +can perform on the mounted host directory are bound to the permissions of +the aforementioned user. +.Pp +Multiple +.Nm +mounts are allowed. +.Sh EXAMPLES +To mount a +.Nm +memory file system: +.Pp +.Dl "mount -t dirfs /usr/src /mnt" +.Sh SEE ALSO +.Xr fstab 5 , +.Xr mount_dirfs 8 , +.Sh HISTORY +The +.Nm +driver first appeared in +.Dx 3.5 . +.Sh AUTHORS +.An -nosplit +The +.Nm +vkernel implementation was written from the scratch by +.An Antonio Huete Jimenez Aq Mt tuxillo@quantumachine.net +.Pp +Numerous fixes and pointers by +.An Matthew Dillon Aq Mt dillon@apollo.backplane.com +.Pp +This manual page was written by +.An Antonio Huete Jimenez Aq Mt tuxillo@quantumachine.net +.Sh BUGS +Currently there is no locking on file descriptors between the host +and the vkernel. +This means that there might be problems with concurrent accesses to the same +file. +.Pp +There is no support for hardlinks in +.Nm +yet. diff --git a/sys/platform/vkernel/conf/files b/sys/platform/vkernel/conf/files index 28f42b4a27..4d3db4cf98 100644 --- a/sys/platform/vkernel/conf/files +++ b/sys/platform/vkernel/conf/files @@ -54,6 +54,9 @@ kern/subr_diskgpt.c standard dev/virtual/vkernel/cdrom/vcd.c optional vcd dev/virtual/vkernel/disk/vdisk.c optional vkd dev/virtual/vkernel/net/if_vke.c optional vke +vfs/dirfs/dirfs_vnops.c optional dirfs +vfs/dirfs/dirfs_vfsops.c optional dirfs +vfs/dirfs/dirfs_subr.c optional dirfs # PLATFORM FILES # diff --git a/sys/platform/vkernel/conf/options b/sys/platform/vkernel/conf/options index e1085b170f..77a7d72db3 100644 --- a/sys/platform/vkernel/conf/options +++ b/sys/platform/vkernel/conf/options @@ -4,3 +4,9 @@ I586_CPU opt_global.h I686_CPU opt_global.h COMPAT_DF12 opt_compatdf12.h + +# Static filesystems +DIRFS opt_dontuse.h + +# KTR options +KTR_DIRFS opt_ktr.h diff --git a/sys/platform/vkernel64/conf/files b/sys/platform/vkernel64/conf/files index 65d75933fd..ba1417a3c4 100644 --- a/sys/platform/vkernel64/conf/files +++ b/sys/platform/vkernel64/conf/files @@ -44,6 +44,9 @@ kern/subr_diskgpt.c standard dev/virtual/vkernel/cdrom/vcd.c optional vcd dev/virtual/vkernel/disk/vdisk.c optional vkd dev/virtual/vkernel/net/if_vke.c optional vke +vfs/dirfs/dirfs_vnops.c optional dirfs +vfs/dirfs/dirfs_vfsops.c optional dirfs +vfs/dirfs/dirfs_subr.c optional dirfs # PLATFORM FILES # diff --git a/sys/platform/vkernel64/conf/options b/sys/platform/vkernel64/conf/options index b9335f8e6e..223439f38f 100644 --- a/sys/platform/vkernel64/conf/options +++ b/sys/platform/vkernel64/conf/options @@ -1,3 +1,9 @@ # The cpu type # HAMMER_CPU opt_global.h + +# Static filesystems +DIRFS opt_dontuse.h + +# KTR options +KTR_DIRFS opt_ktr.h \ No newline at end of file diff --git a/sys/vfs/dirfs/dirfs.h b/sys/vfs/dirfs/dirfs.h new file mode 100644 index 0000000000..6a41329054 --- /dev/null +++ b/sys/vfs/dirfs/dirfs.h @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2013 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Antonio Huete Jimenez + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _SYS_VFS_DIRFS_DIRFS_H_ +#define _SYS_VFS_DIRFS_DIRFS_H_ + +#include + +#include +#include +#include + +MALLOC_DECLARE(M_DIRFS); +MALLOC_DECLARE(M_DIRFS_NODE); +MALLOC_DECLARE(M_DIRFS_MISC); + +#ifndef KTR_DIRFS +#define KTR_DIRFS KTR_ALL +#endif + +#define DIRFS_NOFD -1 /* No fd present */ + +#define DIRFS_ROOT 0x00000001 +#define DIRFS_PASVFD 0x00000002 + +#define DIRFS_TXTFLG "pasvfd" + +/* Used for buffer cache operations */ +#define BSIZE 16384 +#define BMASK (BSIZE - 1) + +/* + * XXX This should be temporary. A semi-proper solution would be to expose + * below prototypes in the _KERNEL_VIRTUAL case. + */ +extern int getdirentries(int, char *, int, long *); +extern int statfs(const char *, struct statfs *); + +/* + * Debugging macros. The impact should be determined and in case it has a + * considerable performance penalty, it should be enclosed in a DEBUG #ifdef. + */ +#define debug_called() do { \ + dbg(9, "called\n", __func__); \ +} while(0) + +#define dbg(lvl, fmt, ...) do { \ + debug(lvl, "%s: " fmt, __func__, ##__VA_ARGS__); \ +} while(0) + +#define debug_node(s) do { \ + dbg(5, "mode=%u flags=%u dn_name=%s " \ + "uid=%u gid=%u objtype=%u nlinks=%d " \ + "size=%jd ctime=%ju atime=%ju mtime=%ju\n", \ + s->dn_mode, s->dn_flags, s->dn_name, \ + s->dn_uid, s->dn_gid, s->dn_type, \ + s->dn_links, s->dn_size, \ + s->dn_ctime, s->dn_atime, \ + s->dn_mtime); \ +} while(0) + +#define debug_node2(n) do { \ + dbg(5, "dnp=%p name=%s fd=%d parent=%p vnode=%p " \ + "refcnt=%d state=%s\n", \ + n, n->dn_name, n->dn_fd, n->dn_parent, n->dn_vnode, \ + n->dn_refcnt, dirfs_flag2str(n)); \ +} while(0) + +/* + * Locking macros + */ +#define dirfs_node_islocked(n) (lockstatus(&(n)->dn_lock,curthread) == LK_EXCLUSIVE) +#define dirfs_node_lock(n) lockmgr(&(n)->dn_lock, LK_EXCLUSIVE|LK_RETRY) +#define dirfs_node_unlock(n) lockmgr(&(n)->dn_lock, LK_RELEASE) +#define dirfs_mount_lock(m) lockmgr(&(m)->dm_lock, LK_EXCLUSIVE|LK_RETRY) +#define dirfs_mount_unlock(m) lockmgr(&(m)->dm_lock, LK_RELEASE) +#define dirfs_mount_gettoken(m) lwkt_gettoken(&(m)->dm_token) +#define dirfs_mount_reltoken(m) lwkt_reltoken(&(m)->dm_token) + +#define dirfs_node_isroot(n) (n->dn_state & DIRFS_ROOT) + +/* + * Main in-memory node structure which will represent a host file when active. + * Upon VOP_NRESOLVE() an attempt to initialize its generic fields will be made + * via a fstatat(2)/lstat(2) call. + */ +struct dirfs_node { + enum vtype dn_type; /* Node type. Same as vnode + type for simplicty */ + + int dn_state; /* Node state flags */ + + TAILQ_ENTRY(dirfs_node) dn_fdentry; /* Passive fd cache */ + RB_ENTRY(dirfs_node) dn_rbentry; /* Inode no. lookup */ + + int dn_refcnt; /* Refs from children */ + int dn_fd; /* File des. for open(2) */ + + struct dirfs_node * dn_parent; /* Pointer to parent node */ + + struct vnode * dn_vnode; /* Reference to its vnode on + the vkernel scope */ + char * dn_name; + int dn_namelen; + + struct lockf dn_advlock; + struct lock dn_lock; + + uint32_t dn_st_dev; /* Device number */ + + /* Generic attributes */ + ino_t dn_ino; + long dn_blocksize; + uid_t dn_uid; + gid_t dn_gid; + mode_t dn_mode; + int dn_flags; + nlink_t dn_links; + int32_t dn_atime; + int32_t dn_atimensec; + int32_t dn_mtime; + int32_t dn_mtimensec; + int32_t dn_ctime; + int32_t dn_ctimensec; + unsigned long dn_gen; + off_t dn_size; +}; +typedef struct dirfs_node *dirfs_node_t; + +/* + * In-memory dirfs mount structure. It corresponds to a mounted + * dirfs filesystem. + */ +struct dirfs_mount { + RB_HEAD(, dn_rbentry) dm_inotree; + TAILQ_HEAD(, dirfs_node) dm_fdlist; + + struct lock dm_lock; + struct lwkt_token dm_token; + dirfs_node_t dm_root; /* Root dirfs node */ + struct mount * dm_mount; + int dm_rdonly; + + int dm_fd_used; /* Opened file descriptors */ + + char dm_path[MAXPATHLEN]; +}; +typedef struct dirfs_mount *dirfs_mount_t; + +/* + * VFS <-> DIRFS conversion macros + */ +#define VFS_TO_DIRFS(mp) ((dirfs_mount_t)((mp)->mnt_data)) +#define DIRFS_TO_VFS(dmp) ((struct mount *)((dmp)->dm_mount)) +#define VP_TO_NODE(vp) ((dirfs_node_t)((vp)->v_data)) +#define NODE_TO_VP(dnp) ((dnp)->dn_vnode) + +/* Misc stuff */ +extern int debuglvl; +extern int dirfs_fd_limit; +extern int dirfs_fd_used; +extern long passive_fd_list_miss; +extern long passive_fd_list_hits; + +extern struct vop_ops dirfs_vnode_vops; + +/* + * Misc functions for node flags and reference count + */ +static __inline void +dirfs_node_ref(dirfs_node_t dnp) +{ + atomic_add_int(&dnp->dn_refcnt, 1); +} + +static __inline int +dirfs_node_unref(dirfs_node_t dnp) +{ + /* + * Returns non-zero on last unref. + */ + KKASSERT(dnp->dn_refcnt > 0); + return (atomic_fetchadd_int(&dnp->dn_refcnt, -1) == 1); +} + +static __inline void +dirfs_node_setflags(dirfs_node_t dnp, int flags) +{ + atomic_set_int(&dnp->dn_state, flags); +} + +static __inline void +dirfs_node_clrflags(dirfs_node_t dnp, int flags) +{ + atomic_clear_int(&dnp->dn_state, flags); +} + + +/* + * Prototypes + */ +dirfs_node_t dirfs_node_alloc(struct mount *); +int dirfs_node_stat(int, const char *, dirfs_node_t); +int dirfs_nodetype(struct stat *); +void dirfs_node_setname(dirfs_node_t, const char *, int); +char *dirfs_node_fullpath(dirfs_mount_t, const char *); +int dirfs_node_free(dirfs_mount_t, dirfs_node_t); +void dirfs_node_drop(dirfs_mount_t dmp, dirfs_node_t dnp); +void dirfs_node_setpassive(dirfs_mount_t dmp, dirfs_node_t dnp, int state); +void dirfs_alloc_vp(struct mount *, struct vnode **, int, dirfs_node_t); +void dirfs_free_vp(dirfs_mount_t, dirfs_node_t); +int dirfs_alloc_file(dirfs_mount_t, dirfs_node_t *, dirfs_node_t, + struct namecache *, struct vnode **, struct vattr *, int); +dirfs_node_t dirfs_findfd(dirfs_mount_t dmp, dirfs_node_t cur, + char **pathto, char **pathfree); +void dirfs_dropfd(dirfs_mount_t dmp, dirfs_node_t dnp1, char *pathfree); +char *dirfs_node_absolute_path(dirfs_mount_t, dirfs_node_t, char **); +char *dirfs_node_absolute_path_plus(dirfs_mount_t, dirfs_node_t, + char *, char **); +int dirfs_open_helper(dirfs_mount_t, dirfs_node_t, int, char *); +int dirfs_close_helper(dirfs_node_t); +int dirfs_node_refcnt(dirfs_node_t); +char *dirfs_flag2str(dirfs_node_t); +int dirfs_node_getperms(dirfs_node_t, int *, int *, int *); +int dirfs_node_chflags(dirfs_node_t, int, struct ucred *); +int dirfs_node_chtimes(dirfs_node_t); +int dirfs_node_chmod(dirfs_mount_t, dirfs_node_t, mode_t cur_mode); +int dirfs_node_chown(dirfs_mount_t, dirfs_node_t, + uid_t cur_uid, uid_t cur_gid, mode_t cur_mode); +int dirfs_node_chsize(dirfs_node_t, off_t); +void debug(int, const char *, ...); + +#endif /* _SYS_VFS_DIRFS_DIRFS_H_ */ diff --git a/sys/vfs/dirfs/dirfs_subr.c b/sys/vfs/dirfs/dirfs_subr.c new file mode 100644 index 0000000000..1a1d6d7a97 --- /dev/null +++ b/sys/vfs/dirfs/dirfs_subr.c @@ -0,0 +1,891 @@ +/* + * Copyright (c) 2013 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Antonio Huete Jimenez + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dirfs.h" + +/* + * Allocate and setup all is needed for the dirfs node to hold the filename. + * Note: dn_name is NULL terminated. + */ +void +dirfs_node_setname(dirfs_node_t dnp, const char *name, int len) +{ + debug_called(); + + if (dnp->dn_name) + kfree(dnp->dn_name, M_DIRFS_MISC); + dnp->dn_name = kmalloc(len + 1, M_DIRFS_MISC, M_WAITOK | M_ZERO); + bcopy(name, dnp->dn_name, len); + dnp->dn_name[len] = 0; + dnp->dn_namelen = len; +} + +/* + * Allocate enough space to hold a dirfs node structure. + * Note: Node name and length isn't handled here. + */ +dirfs_node_t +dirfs_node_alloc(struct mount *mp) +{ + dirfs_node_t dnp; + + debug_called(); + + dnp = kmalloc(sizeof(*dnp), M_DIRFS_NODE, M_WAITOK | M_ZERO); + lockinit(&dnp->dn_lock, "dfsnode", 0, LK_CANRECURSE); + + dnp->dn_fd = DIRFS_NOFD; + + return dnp; +} + +/* + * Drops a reference to the node and. Node is freed when in the last reference. + */ +void +dirfs_node_drop(dirfs_mount_t dmp, dirfs_node_t dnp) +{ + if (dirfs_node_unref(dnp)) + dirfs_node_free(dmp, dnp); +} + +/* + * Removes the association with its parent. Before freeing up its resources + * the node will be removed from the per-mount passive fd cache and its fd + * will be closed, either normally or forced. + */ +int +dirfs_node_free(dirfs_mount_t dmp, dirfs_node_t dnp) +{ + struct vnode *vp; + + debug_called(); + + KKASSERT(dnp != NULL); + debug_node2(dnp); + + KKASSERT(dirfs_node_refcnt(dnp) == 0); + + vp = NODE_TO_VP(dnp); + /* + * Remove the inode from the passive fds list + * as we are tearing down the node. + * Root inode will be removed on VOP_UNMOUNT() + */ + dirfs_mount_gettoken(dmp); + + if (dnp->dn_parent) { /* NULL when children reaped parents */ + dirfs_node_drop(dmp, dnp->dn_parent); + dnp->dn_parent = NULL; + } + dirfs_node_setpassive(dmp, dnp, 0); + if (dnp->dn_name) { + kfree(dnp->dn_name, M_DIRFS_MISC); + dnp->dn_name = NULL; + } + + /* + * The file descriptor should have been closed already by the + * previous call to dirfs_set-passive. If not, force a sync and + * close it. + */ + if (dnp->dn_fd != DIRFS_NOFD) { + if (dnp->dn_vnode) + VOP_FSYNC(vp, MNT_WAIT, 0); + close(dnp->dn_fd); + dnp->dn_fd = DIRFS_NOFD; + } + + lockuninit(&dnp->dn_lock); + kfree(dnp, M_DIRFS_NODE); + dnp = NULL; + + dirfs_mount_reltoken(dmp); + + return 0; +} + +/* + * Do all the operations needed to get a resulting inode <--> host file + * association. This or may not include opening the file, which should be + * only needed when creating it. + * + * In the case vap is not NULL and openflags are specified, open the file. + */ +int +dirfs_alloc_file(dirfs_mount_t dmp, dirfs_node_t *dnpp, dirfs_node_t pdnp, + struct namecache *ncp, struct vnode **vpp, struct vattr *vap, + int openflags) +{ + dirfs_node_t dnp; + dirfs_node_t pathnp; + struct vnode *vp; + struct mount *mp; + char *tmp; + char *pathfree; + int error; + + debug_called(); + + error = 0; + vp = NULL; + mp = DIRFS_TO_VFS(dmp); + + /* Sanity check */ + if (pdnp == NULL) + return EINVAL; + + dnp = dirfs_node_alloc(mp); + KKASSERT(dnp != NULL); + + dirfs_node_lock(dnp); + dirfs_node_setname(dnp, ncp->nc_name, ncp->nc_nlen); + dnp->dn_parent = pdnp; + dirfs_node_ref(pdnp); /* Children ref */ + dirfs_node_unlock(dnp); + + pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree); + + if (openflags && vap != NULL) { + dnp->dn_fd = openat(pathnp->dn_fd, tmp, + openflags, vap->va_mode); + if (dnp->dn_fd == -1) { + dirfs_dropfd(dmp, pathnp, pathfree); + return errno; + } + } + + error = dirfs_node_stat(pathnp->dn_fd, tmp, dnp); + if (error) { /* XXX Handle errors */ + error = errno; + if (vp) + dirfs_free_vp(dmp, dnp); + dirfs_node_free(dmp, dnp); + dirfs_dropfd(dmp, pathnp, pathfree); + return error; + } + + dirfs_alloc_vp(mp, &vp, LK_CANRECURSE, dnp); + *vpp = vp; + *dnpp = dnp; + + dbg(5, "tmp=%s dnp=%p allocated\n", tmp, dnp); + dirfs_dropfd(dmp, pathnp, pathfree); + + return error; +} + +/* + * Requires an already dirfs_node_t that has been already lstat(2) + * for the type comparison + */ +void +dirfs_alloc_vp(struct mount *mp, struct vnode **vpp, int lkflags, + dirfs_node_t dnp) +{ + struct vnode *vp; + dirfs_mount_t dmp = VFS_TO_DIRFS(mp); + + debug_called(); + + /* + * Handle vnode reclaim/alloc races + */ + for (;;) { + vp = dnp->dn_vnode; + if (vp) { + if (vget(vp, LK_EXCLUSIVE) == 0) + break; /* success */ + /* vget raced a reclaim, retry */ + } else { + getnewvnode(VT_UNUSED10, mp, &vp, 0, lkflags); + if (dnp->dn_vnode == NULL) { + dnp->dn_vnode = vp; + vp->v_data = dnp; + vp->v_type = dnp->dn_type; + if (dmp->dm_root == dnp) + vsetflags(vp, VROOT); + dirfs_node_ref(dnp); /* ref for dnp<->vp */ + + /* Type-specific initialization. */ + switch (dnp->dn_type) { + case VBLK: + case VCHR: + case VSOCK: + break; + case VREG: + vinitvmio(vp, dnp->dn_size, BMASK, -1); + break; + case VLNK: + break; + case VFIFO: + // vp->v_ops = &mp->mnt_vn_fifo_ops; + break; + case VDIR: + break; + default: + panic("dirfs_alloc_vp: dnp=%p vp=%p " + "type=%d", + dnp, vp, dnp->dn_type); + /* NOT REACHED */ + break; + } + break; /* success */ + } + vp->v_type = VBAD; + vx_put(vp); + /* multiple dirfs_alloc_vp calls raced, retry */ + } + } + KKASSERT(vp != NULL); + *vpp = vp; + dbg(5, "dnp=%p vp=%p type=%d\n", dnp, vp, vp->v_type); +} + +/* + * Do not call locked! + */ +void +dirfs_free_vp(dirfs_mount_t dmp, dirfs_node_t dnp) +{ + struct vnode *vp = NODE_TO_VP(dnp); + + dnp->dn_vnode = NULL; + vp->v_data = NULL; + dirfs_node_drop(dmp, dnp); +} + +int +dirfs_nodetype(struct stat *st) +{ + int ret; + mode_t mode = st->st_mode; + + debug_called(); + + if (S_ISDIR(mode)) + ret = VDIR; + else if (S_ISBLK(mode)) + ret = VBLK; + else if (S_ISCHR(mode)) + ret = VCHR; + else if (S_ISFIFO(mode)) + ret = VFIFO; + else if (S_ISSOCK(mode)) + ret = VSOCK; + else if (S_ISLNK(mode)) + ret = VLNK; + else if (S_ISREG(mode)) + ret = VREG; + else + ret = VBAD; + + return ret; +} + +int +dirfs_node_stat(int fd, const char *path, dirfs_node_t dnp) +{ + struct stat st; + int error; + + debug_called(); + if (fd == DIRFS_NOFD) + error = lstat(path, &st); + else + error = fstatat(fd, path, &st, AT_SYMLINK_NOFOLLOW); + + if (error) + return errno; + + /* Populate our dirfs node struct with stat data */ + dnp->dn_uid = st.st_uid; + dnp->dn_gid = st.st_gid; + dnp->dn_mode = st.st_mode; + dnp->dn_flags = st.st_flags; + dnp->dn_links = st.st_nlink; + dnp->dn_atime = st.st_atime; + dnp->dn_atimensec = (st.st_atime * 1000000000L); + dnp->dn_mtime = st.st_mtime; + dnp->dn_mtimensec = (st.st_mtime * 1000000000L); + dnp->dn_ctime = st.st_ctime; + dnp->dn_ctimensec = (st.st_ctime * 1000000000L); + dnp->dn_gen = st.st_gen; + dnp->dn_ino = st.st_ino; + dnp->dn_st_dev = st.st_dev; + dnp->dn_size = st.st_size; + dnp->dn_type = dirfs_nodetype(&st); + + return 0; +} + +char * +dirfs_node_absolute_path(dirfs_mount_t dmp, dirfs_node_t cur, char **pathfreep) +{ + return(dirfs_node_absolute_path_plus(dmp, cur, NULL, pathfreep)); +} + +char * +dirfs_node_absolute_path_plus(dirfs_mount_t dmp, dirfs_node_t cur, + char *last, char **pathfreep) +{ + size_t len; + dirfs_node_t dnp1; + char *buf; + int count; + + debug_called(); + + KKASSERT(dmp->dm_root); /* Sanity check */ + *pathfreep = NULL; + if (cur == NULL) + return NULL; + buf = kmalloc(MAXPATHLEN + 1, M_DIRFS_MISC, M_WAITOK); + + /* + * Passed-in trailing element. + */ + count = 0; + buf[MAXPATHLEN] = 0; + if (last) { + len = strlen(last); + count += len; + if (count <= MAXPATHLEN) + bcopy(last, &buf[MAXPATHLEN - count], len); + ++count; + if (count <= MAXPATHLEN) + buf[MAXPATHLEN - count] = '/'; + } + + /* + * Iterate through the parents until we hit the root. + */ + dnp1 = cur; + while (dirfs_node_isroot(dnp1) == 0) { + count += dnp1->dn_namelen; + if (count <= MAXPATHLEN) { + bcopy(dnp1->dn_name, &buf[MAXPATHLEN - count], + dnp1->dn_namelen); + } + ++count; + if (count <= MAXPATHLEN) + buf[MAXPATHLEN - count] = '/'; + dnp1 = dnp1->dn_parent; + if (dnp1 == NULL) + break; + } + + /* + * Prefix with the root mount path. If the element was unlinked + * dnp1 will be NULL and there is no path. + */ + len = strlen(dmp->dm_path); + count += len; + if (dnp1 && count <= MAXPATHLEN) { + bcopy(dmp->dm_path, &buf[MAXPATHLEN - count], len); + *pathfreep = buf; + dbg(5, "absolute_path %s\n", &buf[MAXPATHLEN - count]); + return (&buf[MAXPATHLEN - count]); + } else { + kfree(buf, M_DIRFS_MISC); + *pathfreep = NULL; + return (NULL); + } +} + +/* + * Return a dirfs_node with a valid descriptor plus an allocated + * relative path which can be used in openat(), fstatat(), etc calls + * to locate the requested inode. + */ +dirfs_node_t +dirfs_findfd(dirfs_mount_t dmp, dirfs_node_t cur, + char **pathto, char **pathfreep) +{ + dirfs_node_t dnp1; + int count; + char *buf; + + debug_called(); + + *pathfreep = NULL; + *pathto = NULL; + + if (cur == NULL) + return NULL; + + buf = kmalloc(MAXPATHLEN + 1, M_DIRFS_MISC, M_WAITOK | M_ZERO); + count = 0; + + dnp1 = cur; + while (dnp1 == cur || dnp1->dn_fd == DIRFS_NOFD) { + count += dnp1->dn_namelen; + if (count <= MAXPATHLEN) { + bcopy(dnp1->dn_name, &buf[MAXPATHLEN - count], + dnp1->dn_namelen); + } + ++count; + if (count <= MAXPATHLEN) + buf[MAXPATHLEN - count] = '/'; + dnp1 = dnp1->dn_parent; + KKASSERT(dnp1 != NULL); + } + + if (dnp1 && count <= MAXPATHLEN) { + *pathfreep = buf; + *pathto = &buf[MAXPATHLEN - count + 1]; /* skip '/' prefix */ + dirfs_node_ref(dnp1); + dbg(5, "fd=%d dnp1=%p dnp1->dn_name=%d &buf[off]=%s\n", + dnp1->dn_fd, dnp1, dnp1->dn_name, *pathto); + } else { + dbg(5, "failed too long\n"); + kfree(buf, M_DIRFS_MISC); + *pathfreep = NULL; + *pathto = NULL; + dnp1 = NULL; + } + return (dnp1); +} + +void +dirfs_dropfd(dirfs_mount_t dmp, dirfs_node_t dnp1, char *pathfree) +{ + if (pathfree) + kfree(pathfree, M_DIRFS_MISC); + if (dnp1) + dirfs_node_drop(dmp, dnp1); +} + +int +dirfs_node_getperms(dirfs_node_t dnp, int *r, int *w, int *x) +{ + uid_t u; + gid_t g; + int isowner, isgroup; + + u = getuid(); /* XXX What about EUID? */ + g = getgid(); /* XXX What about EGID? */ + isowner = (u == dnp->dn_uid); + isgroup = (g == dnp->dn_gid); + + if (r) { + if (isowner && (dnp->dn_mode & S_IRUSR)) + *r = 1; + else if (isgroup && (dnp->dn_mode & S_IRGRP)) + *r = 1; + else if (dnp->dn_mode & S_IROTH) + *r = 1; + } + + if (w) { + if (isowner && (dnp->dn_mode & S_IWUSR)) + *w = 1; + else if (isgroup && (dnp->dn_mode & S_IWGRP)) + *w = 1; + else if (dnp->dn_mode & S_IWOTH) + *w = 1; + } + + if (x) { + if (isowner && (dnp->dn_mode & S_IXUSR)) + *x = 1; + else if (isgroup && (dnp->dn_mode & S_IXGRP)) + *x = 1; + else if (dnp->dn_mode & S_IXOTH) + *x = 1; + } + + return 0; +} + +/* + * This requires an allocated node and vnode, otherwise it'll panic + */ +int +dirfs_open_helper(dirfs_mount_t dmp, dirfs_node_t dnp, int parentfd, + char *relpath) +{ + int canread, canwrite, canexec; + dirfs_node_t pathnp; + char *tmp; + char *pathfree; + int flags, error; + + debug_called(); + + canread = canwrite = canexec = 0; + flags = error = 0; + tmp = NULL; + + KKASSERT(dnp); + KKASSERT(dnp->dn_vnode); + + /* + * XXX Besides VDIR and VREG there are other file + * types, y'know? + * Also, O_RDWR alone might not be the best mode to open + * a file with, need to investigate which suits better. + */ + dirfs_node_getperms(dnp, &canread, &canwrite, &canexec); + + if (dnp->dn_type & VDIR) { + flags |= O_DIRECTORY; + } else { + if (canwrite) + flags |= O_RDWR; + else + flags |= O_RDONLY; + } + if (relpath != NULL) { + tmp = relpath; + pathnp = NULL; + KKASSERT(parentfd != DIRFS_NOFD); + } else if (parentfd == DIRFS_NOFD) { + pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree); + parentfd = pathnp->dn_fd; + } else { + pathnp = NULL; + } + + dnp->dn_fd = openat(parentfd, tmp, flags); + if (dnp->dn_fd == -1) + error = errno; + + dbg(5, "dnp=%p tmp2=%s parentfd=%d flags=%d error=%d " + "r=%d w=%d x=%d\n", dnp, tmp, parentfd, flags, error, + canread, canwrite, canexec); + + if (pathnp) + dirfs_dropfd(dmp, pathnp, pathfree); + + return error; +} + +int +dirfs_close_helper(dirfs_node_t dnp) +{ + int error = 0; + + debug_called(); + + + if (dnp->dn_fd != DIRFS_NOFD) { + dbg(5, "closed fd on dnp=%p\n", dnp); +#if 0 + /* buffer cache buffers may still be present */ + error = close(dnp->dn_fd); /* XXX EINTR should be checked */ + dnp->dn_fd = DIRFS_NOFD; +#endif + } + + return error; +} + +int +dirfs_node_refcnt(dirfs_node_t dnp) +{ + return dnp->dn_refcnt; +} + +int +dirfs_node_chtimes(dirfs_node_t dnp) +{ + struct vnode *vp; + dirfs_mount_t dmp; + int error = 0; + char *tmp; + char *pathfree; + + debug_called(); + + vp = NODE_TO_VP(dnp); + dmp = VFS_TO_DIRFS(vp->v_mount); + + KKASSERT(vn_islocked(vp)); + + if (dnp->dn_flags & (IMMUTABLE | APPEND)) + return EPERM; + + tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree); + KKASSERT(tmp); + if((lutimes(tmp, NULL)) == -1) + error = errno; + + dirfs_node_stat(DIRFS_NOFD, tmp, dnp); + dirfs_dropfd(dmp, NULL, pathfree); + + KKASSERT(vn_islocked(vp)); + + + return error; +} + +int +dirfs_node_chflags(dirfs_node_t dnp, int vaflags, struct ucred *cred) +{ + struct vnode *vp; + dirfs_mount_t dmp; + int error = 0; + int flags; + char *tmp; + char *pathfree; + + debug_called(); + + vp = NODE_TO_VP(dnp); + dmp = VFS_TO_DIRFS(vp->v_mount); + + KKASSERT(vn_islocked(vp)); + + flags = dnp->dn_flags; + + error = vop_helper_setattr_flags(&flags, vaflags, dnp->dn_uid, cred); + /* + * When running vkernels with non-root it is not possible to set + * certain flags on host files, such as SF* flags. chflags(2) call + * will spit an error in that case. + */ + if (error == 0) { + tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree); + KKASSERT(tmp); + if((lchflags(tmp, flags)) == -1) + error = errno; + dirfs_node_stat(DIRFS_NOFD, tmp, dnp); + dirfs_dropfd(dmp, NULL, pathfree); + } + + KKASSERT(vn_islocked(vp)); + + return error; +} + +int +dirfs_node_chmod(dirfs_mount_t dmp, dirfs_node_t dnp, mode_t mode) +{ + char *tmp; + char *pathfree; + int error = 0; + + tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree); + KKASSERT(tmp); + if (lchmod(tmp, mode) < 0) + error = errno; + dirfs_node_stat(DIRFS_NOFD, tmp, dnp); + dirfs_dropfd(dmp, NULL, pathfree); + + return error; +} + +int +dirfs_node_chown(dirfs_mount_t dmp, dirfs_node_t dnp, + uid_t uid, uid_t gid, mode_t mode) +{ + char *tmp; + char *pathfree; + int error = 0; + + tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree); + KKASSERT(tmp); + if (lchown(tmp, uid, gid) < 0) + error = errno; + if (mode != dnp->dn_mode) + lchmod(tmp, mode); + dirfs_node_stat(DIRFS_NOFD, tmp, dnp); + dirfs_dropfd(dmp, NULL, pathfree); + + return error; +} + + +int +dirfs_node_chsize(dirfs_node_t dnp, off_t nsize) +{ + dirfs_mount_t dmp; + struct vnode *vp; + int error = 0; + char *tmp; + char *pathfree; + off_t osize; + int biosize; + + debug_called(); + + KKASSERT(dnp); + + vp = NODE_TO_VP(dnp); + dmp = VFS_TO_DIRFS(vp->v_mount); + biosize = BSIZE; + osize = dnp->dn_size; + + KKASSERT(vn_islocked(vp)); + + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VREG: + break; + default: + return (EOPNOTSUPP); + + } + + tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree); + if (nsize < osize) { + error = nvtruncbuf(vp, nsize, biosize, -1, 0); + } else { + error = nvextendbuf(vp, osize, nsize, + biosize, biosize, + -1, -1, 0); + } + if (error == 0 && truncate(tmp, nsize) < 0) + error = errno; + if (error == 0) + dnp->dn_size = nsize; + dbg(5, "TRUNCATE %016jx %016jx\n", (intmax_t)nsize, dnp->dn_size); + /*dirfs_node_stat(DIRFS_NOFD, tmp, dnp); don't need to do this*/ + + dirfs_dropfd(dmp, NULL, pathfree); + + + KKASSERT(vn_islocked(vp)); + + return error; +} + +void +dirfs_node_setpassive(dirfs_mount_t dmp, dirfs_node_t dnp, int state) +{ + struct vnode *vp; + + if (state && (dnp->dn_state & DIRFS_PASVFD) == 0 && + dnp->dn_fd != DIRFS_NOFD) { + dirfs_node_ref(dnp); + dirfs_node_setflags(dnp, DIRFS_PASVFD); + TAILQ_INSERT_TAIL(&dmp->dm_fdlist, dnp, dn_fdentry); + ++dirfs_fd_used; + ++dmp->dm_fd_used; + + /* + * If we are over our limit remove nodes from the + * passive fd cache. + */ + while (dmp->dm_fd_used > dirfs_fd_limit) { + dnp = TAILQ_FIRST(&dmp->dm_fdlist); + dirfs_node_setpassive(dmp, dnp, 0); + } + } + if (state == 0 && (dnp->dn_state & DIRFS_PASVFD)) { + dirfs_node_clrflags(dnp, DIRFS_PASVFD); + TAILQ_REMOVE(&dmp->dm_fdlist, dnp, dn_fdentry); + --dirfs_fd_used; + --dmp->dm_fd_used; + dbg(5, "dnp=%p removed from fdlist. %d used\n", + dnp, dirfs_fd_used); + + /* + * Attempt to close the descriptor. We can only do this + * if the related vnode is inactive and has exactly two + * refs (representing the vp<->dnp and PASVFD). Otherwise + * someone might have ref'd the node in order to use the + * dn_fd. + * + * Also, if the vnode is in any way dirty we leave the fd + * open for the buffer cache code. The syncer will eventually + * come along and fsync the vnode, and the next inactive + * transition will deal with the descriptor. + * + * The descriptor for the root node is NEVER closed by + * this function. + */ + vp = dnp->dn_vnode; + if (dirfs_node_refcnt(dnp) == 2 && vp && + dnp->dn_fd != DIRFS_NOFD && + !dirfs_node_isroot(dnp) && + (vp->v_flag & (VINACTIVE|VOBJDIRTY)) == VINACTIVE && + RB_EMPTY(&vp->v_rbdirty_tree)) { + dbg(5, "passive cache: closing %d\n", dnp->dn_fd); + close(dnp->dn_fd); + dnp->dn_fd = DIRFS_NOFD; + } else { + if (dirfs_node_refcnt(dnp) == 1 && dnp->dn_vnode == NULL && + dnp->dn_fd != DIRFS_NOFD && + dnp != dmp->dm_root) { + dbg(5, "passive cache: closing %d\n", dnp->dn_fd); + close(dnp->dn_fd); + dnp->dn_fd = DIRFS_NOFD; + } + } + dirfs_node_drop(dmp, dnp); + } +} + +char * +dirfs_flag2str(dirfs_node_t dnp) +{ + const char *txtflg[] = { DIRFS_TXTFLG }; + static char str[512] = {0}; + + if (dnp->dn_state & DIRFS_PASVFD) + ksprintf(str, "%s ", txtflg[0]); + + return str; +} + +void +debug(int level, const char *fmt, ...) +{ + __va_list ap; + + if (debuglvl >= level) { + __va_start(ap, fmt); + kvprintf(fmt, ap); + __va_end(ap); + } +} + diff --git a/sys/vfs/dirfs/dirfs_vfsops.c b/sys/vfs/dirfs/dirfs_vfsops.c new file mode 100644 index 0000000000..a09c1de451 --- /dev/null +++ b/sys/vfs/dirfs/dirfs_vfsops.c @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2013 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Antonio Huete Jimenez + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dirfs.h" + +MALLOC_DEFINE(M_DIRFS, "dirfs", "dirfs mount allocation"); +MALLOC_DEFINE(M_DIRFS_NODE, "dirfs nodes", "dirfs nodes memory allocation"); +MALLOC_DEFINE(M_DIRFS_MISC, "dirfs misc", "dirfs miscellaneous allocation"); + +/* + * Kernel tracing facilities + */ +KTR_INFO_MASTER(dirfs); + +KTR_INFO(KTR_DIRFS, dirfs, root, 31, + "DIRFS(root dnp=%p vnode=%p hostdir=%s fd=%d error=%d)", + dirfs_node_t dnp, struct vnode *vp, char *hostdir, int fd, int error); + +/* System wide sysctl stuff */ +int debuglvl = 2; +int dirfs_fd_limit = 100; +int dirfs_fd_used = 0; +long passive_fd_list_miss = 0; +long passive_fd_list_hits = 0; + +SYSCTL_NODE(_vfs, OID_AUTO, dirfs, CTLFLAG_RW, 0, + "dirfs filesystem for vkernels"); +SYSCTL_INT(_vfs_dirfs, OID_AUTO, debug, CTLFLAG_RW, + &debuglvl, 0, "dirfs debug level"); +SYSCTL_INT(_vfs_dirfs, OID_AUTO, fd_limit, CTLFLAG_RW, + &dirfs_fd_limit, 0, "Maximum number of passive nodes to cache"); +SYSCTL_INT(_vfs_dirfs, OID_AUTO, fd_used, CTLFLAG_RD, + &dirfs_fd_used, 0, "Current number of passive nodes cached"); +SYSCTL_LONG(_vfs_dirfs, OID_AUTO, passive_fd_list_miss, CTLFLAG_RD, + &passive_fd_list_miss, 0, "Passive fd list cache misses"); +SYSCTL_LONG(_vfs_dirfs, OID_AUTO, passive_fd_list_hits, CTLFLAG_RD, + &passive_fd_list_hits, 0, "Passive fd list cache misses"); + +static int dirfs_statfs(struct mount *, struct statfs *, struct ucred *); + +static int +dirfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) +{ + dirfs_mount_t dmp; + struct stat st; + size_t done, nlen; + int error; + + debug_called(); + + if (mp->mnt_flag & MNT_UPDATE) { + dmp = VFS_TO_DIRFS(mp); + if (dmp->dm_rdonly == 0 && (mp->mnt_flag & MNT_RDONLY)) { + /* XXX We should make sure all writes are synced */ + dmp->dm_rdonly = 1; + debug(2, "dirfs read-write -> read-only\n"); + } + + if (dmp->dm_rdonly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { + debug(2, "dirfs read-only -> read-write\n"); + dmp->dm_rdonly = 0; + } + return 0; + } + + dmp = kmalloc(sizeof(*dmp), M_DIRFS, M_WAITOK | M_ZERO); + mp->mnt_data = (qaddr_t)dmp; + dmp->dm_mount = mp; + + error = copyinstr(data, &dmp->dm_path, MAXPATHLEN, &done); + if (error) { + /* Attempt to copy from kernel address */ + error = copystr(data, &dmp->dm_path, MAXPATHLEN, &done); + if (error) { + kfree(dmp, M_DIRFS); + return error; + } + } + + /* Strip / character at the end to avoid problems */ + nlen = strnlen(dmp->dm_path, MAXPATHLEN); + if (dmp->dm_path[nlen-1] == '/') + dmp->dm_path[nlen-1] = 0; + + /* Make sure host directory exists and it is indeed a directory. */ + if ((stat(dmp->dm_path, &st)) == 0) { + if (!S_ISDIR(st.st_mode)) { + kfree(dmp, M_DIRFS); + return EINVAL; + } + } else { + return errno; + } + + lockinit(&dmp->dm_lock, "dfsmnt", 0, LK_CANRECURSE); + + vfs_add_vnodeops(mp, &dirfs_vnode_vops, &mp->mnt_vn_norm_ops); + vfs_getnewfsid(mp); + + TAILQ_INIT(&dmp->dm_fdlist); + RB_INIT(&dmp->dm_inotree); + + kmalloc_raise_limit(M_DIRFS_NODE, 0); + + dirfs_statfs(mp, &mp->mnt_stat, cred); + + dbg(5, "%s mounted. dmp=%p mp=%p\n", dmp->dm_path, dmp, mp); + + return 0; +} + +static int +dirfs_unmount(struct mount *mp, int mntflags) +{ + dirfs_mount_t dmp; + dirfs_node_t dnp; + int cnt; + int error; + + debug_called(); + cnt = 0; + dmp = VFS_TO_DIRFS(mp); + + error = vflush(mp, 0, 0); + if (error) + return error; + + /* + * Clean up dm_fdlist. There should be no vnodes left so the + * only ref should be from the fdlist. + */ + while ((dnp = TAILQ_FIRST(&dmp->dm_fdlist)) != NULL) { + dirfs_node_setpassive(dmp, dnp, 0); + } + + /* + * Cleanup root node + */ + dnp = dmp->dm_root; + dirfs_close_helper(dnp); + debug_node2(dnp); + dirfs_node_drop(dmp, dnp); /* last ref should free structure */ + + kfree(dmp, M_DIRFS); + mp->mnt_data = (qaddr_t) 0; + + dbg(5, "dirfs umounted successfully\n"); + + return 0; +} + +static int +dirfs_root(struct mount *mp, struct vnode **vpp) +{ + dirfs_mount_t dmp; + dirfs_node_t dnp; + int fd; + int error; + + debug_called(); + + dmp = VFS_TO_DIRFS(mp); + KKASSERT(dmp != NULL); + + if (dmp->dm_root == NULL) { + /* + * dm_root holds the root dirfs node. Allocate a new one since + * there is none. Also attempt to lstat(2) it, in order to set + * data for VOP_ACCESS() + */ + dnp = dirfs_node_alloc(mp); + error = dirfs_node_stat(DIRFS_NOFD, dmp->dm_path, dnp); + if (error != 0) { + dirfs_node_free(dmp, dnp); + return error; + } + dirfs_node_ref(dnp); /* leave inact for life of mount */ + + /* Root inode's parent is NULL, used for verification */ + dnp->dn_parent = NULL; + dmp->dm_root = dnp; + dirfs_node_setflags(dnp, DIRFS_ROOT); + + /* + * Maintain an open descriptor on the root dnp. The + * normal open/close/cache does not apply for the root + * so the descriptor is ALWAYS available. + */ + fd = open(dmp->dm_path, O_DIRECTORY); + if (fd == -1) { + dbg(5, "failed to open ROOT node\n"); + dirfs_free_vp(dmp, dnp); + dirfs_node_free(dmp, dnp); + return errno; + } + dnp->dn_fd = fd; + dnp->dn_type = VDIR; + } else { + dnp = dmp->dm_root; + } + + /* + * Acquire the root vnode (dn_type already set above). This + * call will handle any races and return a locked vnode. + */ + dirfs_alloc_vp(mp, vpp, LK_CANRECURSE, dnp); + KTR_LOG(dirfs_root, dnp, *vpp, dmp->dm_path, dnp->dn_fd, error); + + return 0; +} + +static int +dirfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, struct vnode **vpp) +{ + debug_called(); + + return EOPNOTSUPP; +} + +static int +dirfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) +{ + dirfs_mount_t dmp = VFS_TO_DIRFS(mp); + struct statfs st; + + debug_called(); + + if((statfs(dmp->dm_path, &st)) == -1) + return errno; + + ksnprintf(st.f_mntfromname, MNAMELEN - 1, "dirfs@%s", dmp->dm_path); + bcopy(&st, sbp, sizeof(st)); + strlcpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); + dbg(5, "iosize = %zd\n", sbp->f_iosize); + + return 0; +} + +static int +dirfs_vptofh(struct vnode *vp, struct fid *fhp) +{ + dirfs_node_t dnp; + + dnp = VP_TO_NODE(vp); + debug_node2(dnp); + debug_called(); + + return EOPNOTSUPP; +} + +static int +dirfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp, + struct ucred **credanonp) +{ + debug_called(); + + return EOPNOTSUPP; +} + +static struct vfsops dirfs_vfsops = { + .vfs_mount = dirfs_mount, + .vfs_unmount = dirfs_unmount, + .vfs_root = dirfs_root, + .vfs_vget = vfs_stdvget, + .vfs_statfs = dirfs_statfs, + .vfs_fhtovp = dirfs_fhtovp, + .vfs_vptofh = dirfs_vptofh, + .vfs_sync = vfs_stdsync, + .vfs_checkexp = dirfs_checkexp +}; + +VFS_SET(dirfs_vfsops, dirfs, 0); +MODULE_VERSION(dirfs, 1); diff --git a/sys/vfs/dirfs/dirfs_vnops.c b/sys/vfs/dirfs/dirfs_vnops.c new file mode 100644 index 0000000000..f060f4113d --- /dev/null +++ b/sys/vfs/dirfs/dirfs_vnops.c @@ -0,0 +1,1500 @@ +/* + * Copyright (c) 2013 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Antonio Huete Jimenez + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * See below a small table with the vnode operation and syscall correspondence + * where it applies: + * + * VNODE OP SCALL SCALL_AT FD PATH COMMENTS + * dirfs_ncreate Y Y Y Y open(2), openat(2) + * dirfs_nresolve - - - Y no syscall needed + * dirfs_nlookupdot - - - - - + * dirfs_nmknod Y Y Y Y mknod(2), mknodat(2) + * dirfs_open Y Y Y Y open(2), openat(2) + * dirfs_close Y Y Y Y close(2) + * dirfs_access - - - - data from stat(2) + * dirfs_getattr Y Y Y Y lstat(2), fstat(2), fstatat(2) + * dirfs_setattr - - - - - + * dirfs_read Y - Y - read(2). relies on bufcache + * dirfs_write Y - Y - write(2). relies on bufcache + * dirfs_fsync Y - Y - fsync(2) + * dirfs_mountctl - - - - - + * dirfs_nremove Y - - Y unlink(2) + * dirfs_nlink - - - - - + * dirfs_nrename Y Y Y Y rename(2), renameat(2) + * dirfs_nmkdir Y Y Y Y mkdir(2), mkdirat(2) + * dirfs_nrmdir Y - - Y rmdir(2) + * dirfs_nsymlink Y Y Y Y symlink(2), symlinkat(2) + * dirfs_readdir Y - Y - getdirentries(2) + * dirfs_readlink Y Y Y Y readlink(2), readlinkat(2) + * dirfs_inactive - - - - - + * dirfs_reclaim - - - - - + * dirfs_print - - - - - + * dirfs_pathconf - - - - - + * dirfs_bmap - - - - - + * dirfs_strategy Y - Y - pwrite(2), pread(2) + * dirfs_advlock - - - - - + * dirfs_kqfilter - - - - - + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dirfs.h" + +/* + * Kernel tracing facilities + */ +KTR_INFO_MASTER_EXTERN(dirfs); + +KTR_INFO(KTR_DIRFS, dirfs, unsupported, 0, + "DIRFS(func=%s)", + const char *func); + +KTR_INFO(KTR_DIRFS, dirfs, nresolve, 0, + "DIRFS(dnp=%p ncp_name=%s parent=%p pfd=%d error=%d)", + dirfs_node_t dnp, char *name, dirfs_node_t pdnp, int pfd, int error); + +KTR_INFO(KTR_DIRFS, dirfs, ncreate, 1, + "DIRFS(dnp=%p ncp_name=%s parent=%p pfd=%d error=%d)", + dirfs_node_t dnp, char *name, dirfs_node_t pdnp, int pfd, int error); + +KTR_INFO(KTR_DIRFS, dirfs, open, 2, + "DIRFS(dnp=%p newfd?=%s)", + dirfs_node_t dnp, char *isnew); + +KTR_INFO(KTR_DIRFS, dirfs, close, 3, + "DIRFS(dnp=%p fd=%d vfsync error=%d)", + dirfs_node_t dnp, int fd, int error); + +KTR_INFO(KTR_DIRFS, dirfs, readdir, 4, + "DIRFS(dnp=%p fd=%d startoff=%jd uio_offset=%jd)", + dirfs_node_t dnp, int fd, off_t startoff, off_t uoff); + +KTR_INFO(KTR_DIRFS, dirfs, access, 5, + "DIRFS(dnp=%p error=%d)", + dirfs_node_t dnp, int error); + +KTR_INFO(KTR_DIRFS, dirfs, getattr, 6, + "DIRFS(dnp=%p error=%d)", + dirfs_node_t dnp, int error); + +KTR_INFO(KTR_DIRFS, dirfs, setattr, 7, + "DIRFS(dnp=%p action=%s error=%d)", + dirfs_node_t dnp, const char *action, int error); + +KTR_INFO(KTR_DIRFS, dirfs, fsync, 8, + "DIRFS(dnp=%p error=%d)", + dirfs_node_t dnp, int error); + +KTR_INFO(KTR_DIRFS, dirfs, read, 9, + "DIRFS(dnp=%p size=%jd error=%d)", + dirfs_node_t dnp, size_t size, int error); + +KTR_INFO(KTR_DIRFS, dirfs, write, 10, + "DIRFS(dnp=%p size=%jd boff=%jd uio_resid=%jd error=%d)", + dirfs_node_t dnp, off_t boff, size_t resid, size_t size, int error); + +KTR_INFO(KTR_DIRFS, dirfs, strategy, 11, + "DIRFS(dnp=%p dnp_size=%jd iosize=%jd b_cmd=%d b_error=%d " + "b_resid=%d bio_off=%jd error=%d)", + dirfs_node_t dnp, size_t size, size_t iosize, int cmd, int berror, + int bresid, off_t biooff, int error); + +KTR_INFO(KTR_DIRFS, dirfs, nremove, 12, + "DIRFS(dnp=%p pdnp=%p error=%d)", + dirfs_node_t dnp, dirfs_node_t pdnp, int error); + +KTR_INFO(KTR_DIRFS, dirfs, nmkdir, 13, + "DIRFS(pdnp=%p dnp=%p nc_name=%p error=%d)", + dirfs_node_t dnp, dirfs_node_t pdnp, char *n, int error); + +KTR_INFO(KTR_DIRFS, dirfs, nrmdir, 13, + "DIRFS(pdnp=%p dnp=%p error=%d)", + dirfs_node_t dnp, dirfs_node_t pdnp, int error); + +KTR_INFO(KTR_DIRFS, dirfs, nsymlink, 14, + "DIRFS(dnp=%p target=%s symlink=%s error=%d)", + dirfs_node_t dnp, char *tgt, char *lnk, int error); + +/* Needed prototypes */ +int dirfs_access(struct vop_access_args *); +int dirfs_getattr(struct vop_getattr_args *); +int dirfs_setattr(struct vop_setattr_args *); +int dirfs_reclaim(struct vop_reclaim_args *); + +static int +dirfs_nresolve(struct vop_nresolve_args *ap) +{ + dirfs_node_t pdnp, dnp, d1, d2; + dirfs_mount_t dmp; + struct namecache *ncp; + struct nchandle *nch; + struct vnode *dvp; + struct vnode *vp; + struct mount *mp; + int error; + + debug_called(); + + error = 0; + nch = ap->a_nch; + ncp = nch->ncp; + mp = nch->mount; + dvp = ap->a_dvp; + vp = NULL; + dnp = d1 = d2 = NULL; + pdnp = VP_TO_NODE(dvp); + dmp = VFS_TO_DIRFS(mp); + + dirfs_node_lock(pdnp); + TAILQ_FOREACH_MUTABLE(d1, &dmp->dm_fdlist, dn_fdentry, d2) { + if (d1->dn_parent == pdnp && + (strcmp(d1->dn_name, ncp->nc_name) == 0)) { + dnp = d1; + dirfs_node_ref(dnp); + passive_fd_list_hits++; + break; + } + } + dirfs_node_unlock(pdnp); + + if (dnp) { + dirfs_alloc_vp(mp, &vp, LK_CANRECURSE, dnp); + dirfs_node_drop(dmp, dnp); + } else { + passive_fd_list_miss++; + error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, &vp, NULL, 0); + } + + if (vp) { + if (error && error == ENOENT) { + cache_setvp(nch, NULL); + } else { + vn_unlock(vp); + cache_setvp(nch, vp); + vrele(vp); + } + } + + KTR_LOG(dirfs_nresolve, dnp, ncp->nc_name, pdnp, pdnp->dn_fd, error); + + return error; +} + +static int +dirfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap) +{ + debug_called(); + + KTR_LOG(dirfs_unsupported, __func__); + + return EOPNOTSUPP; +} + +static int +dirfs_ncreate(struct vop_ncreate_args *ap) +{ + dirfs_node_t pdnp; + dirfs_node_t dnp; + dirfs_mount_t dmp; + struct namecache *ncp; + struct vnode *dvp; + struct vnode **vpp; + struct vattr *vap; + int canwrite = 0; + int error; + + debug_called(); + + error = 0; + dnp = NULL; + dvp = ap->a_dvp; + pdnp = VP_TO_NODE(dvp); + dmp = VFS_TO_DIRFS(dvp->v_mount); + vap = ap->a_vap; + ncp = ap->a_nch->ncp; + vpp = ap->a_vpp; + + dirfs_mount_gettoken(dmp); + + dirfs_node_getperms(pdnp, NULL, &canwrite, NULL); + if (!canwrite) + error = EPERM; + + error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, vpp, vap, + (O_CREAT | O_RDWR)); + + if (error == 0) { + cache_setunresolved(ap->a_nch); + cache_setvp(ap->a_nch, *vpp); + } + + dirfs_mount_reltoken(dmp); + + KTR_LOG(dirfs_ncreate, dnp, ncp->nc_name, pdnp, pdnp->dn_fd, error); + + return error; +} + +static int +dirfs_nmknod(struct vop_nmknod_args *v) +{ + debug_called(); + + return EOPNOTSUPP; +} + +static int +dirfs_open(struct vop_open_args *ap) +{ + dirfs_node_t dnp; + dirfs_mount_t dmp; + struct vnode *vp; + int error; + int ofd, nfd; + + debug_called(); + + vp = ap->a_vp; + dnp = VP_TO_NODE(vp); + dmp = VFS_TO_DIRFS(vp->v_mount); + error = 0; + ofd = nfd = dnp->dn_fd; + + /* + * Root inode has been allocated and opened in VFS_ROOT() so + * no reason to attempt to open it again. + */ + if (dmp->dm_root != dnp && dnp->dn_fd == DIRFS_NOFD) { + error = dirfs_open_helper(dmp, dnp, DIRFS_NOFD, NULL); + if (error) + return error; + nfd = dnp->dn_fd; + } + + KTR_LOG(dirfs_open, dnp, (ofd != nfd) ? "true" : "false"); + + return vop_stdopen(ap); +} + +static int +dirfs_close(struct vop_close_args *ap) +{ + struct vnode *vp; + dirfs_node_t dnp; + int error; + + debug_called(); + + error = 0; + vp = ap->a_vp; + dnp = VP_TO_NODE(vp); + + if (vp->v_type == VREG) { + error = vfsync(vp, 0, 1, NULL, NULL); + if (error) + dbg(5, "vfsync error=%d\n", error); + } + + KTR_LOG(dirfs_close, dnp, dnp->dn_fd, error); + + return vop_stdclose(ap); +} + +int +dirfs_access(struct vop_access_args *ap) +{ + struct vnode *vp = ap->a_vp; + int error; + dirfs_node_t dnp; + + debug_called(); + + dnp = VP_TO_NODE(vp); + + switch (vp->v_type) { + case VDIR: + /* FALLTHROUGH */ + case VLNK: + /* FALLTHROUGH */ + case VREG: + if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { + error = EROFS; + goto out; + } + break; + case VBLK: + /* FALLTHROUGH */ + case VCHR: + /* FALLTHROUGH */ + case VSOCK: + /* FALLTHROUGH */ + case VFIFO: + break; + + default: + error = EINVAL; + goto out; + } + + error = vop_helper_access(ap, dnp->dn_uid, + dnp->dn_gid, dnp->dn_mode, 0); + +out: + KTR_LOG(dirfs_access, dnp, error); + + return error; +} + +int +dirfs_getattr(struct vop_getattr_args *ap) +{ + dirfs_mount_t dmp; + dirfs_node_t dnp; + dirfs_node_t pathnp; + struct vnode *vp; + struct vattr *vap; + char *tmp; + char *pathfree; + int error; + + debug_called(); + + vp = ap->a_vp; + vap = ap->a_vap; + dnp = VP_TO_NODE(vp); + dmp = VFS_TO_DIRFS(vp->v_mount); + + KKASSERT(dnp); /* This must not happen */ + + if (!dirfs_node_isroot(dnp)) { + pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree); + + KKASSERT(pathnp->dn_fd != DIRFS_NOFD); + + error = dirfs_node_stat(pathnp->dn_fd, tmp, dnp); + dirfs_dropfd(dmp, pathnp, pathfree); + } else { + error = dirfs_node_stat(DIRFS_NOFD, dmp->dm_path, dnp); + } + + if (error == 0) { + dirfs_node_lock(dnp); + vap->va_nlink = dnp->dn_links; + vap->va_type = dnp->dn_type; + vap->va_mode = dnp->dn_mode; + vap->va_uid = dnp->dn_uid; + vap->va_gid = dnp->dn_gid; + vap->va_fileid = dnp->dn_ino; + vap->va_size = dnp->dn_size; + vap->va_blocksize = dnp->dn_blocksize; + vap->va_atime.tv_sec = dnp->dn_atime; + vap->va_atime.tv_nsec = dnp->dn_atimensec; + vap->va_mtime.tv_sec = dnp->dn_mtime; + vap->va_mtime.tv_nsec = dnp->dn_mtimensec; + vap->va_ctime.tv_sec = dnp->dn_ctime; + vap->va_ctime.tv_nsec = dnp->dn_ctimensec; + vap->va_bytes = dnp->dn_size; + vap->va_gen = dnp->dn_gen; + vap->va_flags = dnp->dn_flags; + vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + dirfs_node_unlock(dnp); + } + + KTR_LOG(dirfs_getattr, dnp, error); + + return 0; +} + +int +dirfs_setattr(struct vop_setattr_args *ap) +{ + dirfs_mount_t dmp; + dirfs_node_t dnp; + struct vnode *vp; + struct vattr *vap; + struct ucred *cred; + int error; +#ifdef KTR + const char *msg[6] = { + "invalid", + "chflags", + "chsize", + "chown", + "chmod", + "chtimes" + }; +#endif + int msgno; + + debug_called(); + + error = msgno = 0; + vp = ap->a_vp; + vap = ap->a_vap; + cred = ap->a_cred; + dnp = VP_TO_NODE(vp); + dmp = VFS_TO_DIRFS(vp->v_mount); + + dirfs_mount_gettoken(dmp); + + /* + * Check for unsettable attributes. + */ + if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || + (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || + (vap->va_blocksize != VNOVAL) || (vap->va_rmajor != VNOVAL) || + ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { + msgno = 0; + error = EINVAL; + goto out; + } + + /* + * Change file flags + */ + if (error == 0 && (vap->va_flags != VNOVAL)) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + error = EROFS; + else + error = dirfs_node_chflags(dnp, vap->va_flags, cred); + msgno = 1; + goto out; + } + + /* + * Extend or truncate a file + */ + if (error == 0 && (vap->va_size != VNOVAL)) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + error = EROFS; + else + error = dirfs_node_chsize(dnp, vap->va_size); + dbg(2, "dnp size=%jd vap size=%jd\n", dnp->dn_size, vap->va_size); + msgno = 2; + goto out; + } + + /* + * Change file owner or group + */ + if (error == 0 && (vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL)) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + error = EROFS; + } else { + mode_t cur_mode = dnp->dn_mode; + uid_t cur_uid = dnp->dn_uid; + gid_t cur_gid = dnp->dn_gid; + + error = vop_helper_chown(ap->a_vp, vap->va_uid, + vap->va_gid, ap->a_cred, + &cur_uid, &cur_gid, &cur_mode); + if (error == 0 && + (cur_mode != dnp->dn_mode || + cur_uid != dnp->dn_uid || + cur_gid != dnp->dn_gid)) { + error = dirfs_node_chown(dmp, dnp, cur_uid, + cur_gid, cur_mode); + } + } + msgno = 3; + goto out; + } + + /* + * Change file mode + */ + if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + error = EROFS; + } else { + mode_t cur_mode = dnp->dn_mode; + uid_t cur_uid = dnp->dn_uid; + gid_t cur_gid = dnp->dn_gid; + + error = vop_helper_chmod(ap->a_vp, vap->va_mode, + ap->a_cred, + cur_uid, cur_gid, &cur_mode); + if (error == 0 && cur_mode != dnp->dn_mode) { + error = dirfs_node_chmod(dmp, dnp, cur_mode); + } + } + msgno = 4; + goto out; + } + + /* + * Change file times + */ + if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL && + vap->va_atime.tv_nsec != VNOVAL) || + (vap->va_mtime.tv_sec != VNOVAL && + vap->va_mtime.tv_nsec != VNOVAL) )) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + error = EROFS; + else + error = dirfs_node_chtimes(dnp); + msgno = 5; + goto out; + + } +out: + dirfs_mount_reltoken(dmp); + + KTR_LOG(dirfs_setattr, dnp, msg[msgno], error); + + return error; +} + +static int +dirfs_fsync(struct vop_fsync_args *ap) +{ + dirfs_node_t dnp = VP_TO_NODE(ap->a_vp); + int error = 0; + + debug_called(); + + vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL); + + if (dnp->dn_fd != DIRFS_NOFD) { + if (fsync(dnp->dn_fd) == -1) + error = fsync(dnp->dn_fd); + } + + KTR_LOG(dirfs_fsync, dnp, error); + + return 0; +} + +static int +dirfs_read(struct vop_read_args *ap) +{ + struct buf *bp; + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + dirfs_node_t dnp; + off_t base_offset; + size_t offset; + size_t len; + int error; + + debug_called(); + + error = 0; + if (uio->uio_resid == 0) { + dbg(5, "zero len uio->uio_resid\n"); + return error; + } + + dnp = VP_TO_NODE(vp); + + if (uio->uio_offset < 0) + return (EINVAL); + if (vp->v_type != VREG) + return (EINVAL); + + while (uio->uio_resid > 0 && uio->uio_offset < dnp->dn_size) { + /* + * Use buffer cache I/O (via dirfs_strategy) + */ + offset = (size_t)uio->uio_offset & BMASK; + base_offset = (off_t)uio->uio_offset - offset; + bp = getcacheblk(vp, base_offset, BSIZE, 0); + if (bp == NULL) { + lwkt_gettoken(&vp->v_mount->mnt_token); + error = bread(vp, base_offset, BSIZE, &bp); + if (error) { + brelse(bp); + lwkt_reltoken(&vp->v_mount->mnt_token); + dbg(5, "dirfs_read bread error %d\n", error); + break; + } + lwkt_reltoken(&vp->v_mount->mnt_token); + } + + /* + * Figure out how many bytes we can actually copy this loop. + */ + len = BSIZE - offset; + if (len > uio->uio_resid) + len = uio->uio_resid; + if (len > dnp->dn_size - uio->uio_offset) + len = (size_t)(dnp->dn_size - uio->uio_offset); + + error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); + bqrelse(bp); + if (error) { + dbg(5, "dirfs_read uiomove error %d\n", error); + break; + } + } + + KTR_LOG(dirfs_read, dnp, dnp->dn_size, error); + + return(error); +} + +static int +dirfs_write (struct vop_write_args *ap) +{ + dirfs_node_t dnp; + dirfs_mount_t dmp; + struct buf *bp; + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + struct thread *td = uio->uio_td; + int error; + off_t osize; + off_t nsize; + off_t base_offset; + size_t offset; + size_t len; + struct rlimit limit; + + debug_called(); + + error = 0; + if (uio->uio_resid == 0) { + dbg(5, "zero-length uio->uio_resid\n"); + return error; + } + + dnp = VP_TO_NODE(vp); + dmp = VFS_TO_DIRFS(vp->v_mount); + + if (vp->v_type != VREG) + return (EINVAL); + + if (vp->v_type == VREG && td != NULL) { + error = kern_getrlimit(RLIMIT_FSIZE, &limit); + if (error != 0) { + dbg(5, "rlimit failure\n"); + return error; + } + if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) { + dbg(5, "file too big\n"); + ksignal(td->td_proc, SIGXFSZ); + return (EFBIG); + } + } + + if (ap->a_ioflag & IO_APPEND) + uio->uio_offset = dnp->dn_size; + + /* + * buffer cache operations may be deferred, make sure + * the file is correctly sized right now. + */ + osize = dnp->dn_size; + nsize = uio->uio_offset + uio->uio_resid; + if (nsize > osize && uio->uio_resid) { + KKASSERT(dnp->dn_fd >= 0); + dnp->dn_size = nsize; + ftruncate(dnp->dn_fd, nsize); + nvextendbuf(vp, osize, nsize, + BSIZE, BSIZE, -1, -1, 0); + } /* else nsize = osize; NOT USED */ + + while (uio->uio_resid > 0) { + /* + * Use buffer cache I/O (via dirfs_strategy) + */ + offset = (size_t)uio->uio_offset & BMASK; + base_offset = (off_t)uio->uio_offset - offset; + len = BSIZE - offset; + + if (len > uio->uio_resid) + len = uio->uio_resid; + + error = bread(vp, base_offset, BSIZE, &bp); + error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); + if (error) { + brelse(bp); + dbg(2, "WRITE uiomove failed\n"); + break; + } + +// dbg(2, "WRITE dn_size=%jd uio_offset=%jd uio_resid=%jd base_offset=%jd\n", +// dnp->dn_size, uio->uio_offset, uio->uio_resid, base_offset); + + if (ap->a_ioflag & IO_SYNC) + bwrite(bp); + else + bdwrite(bp); + } + + KTR_LOG(dirfs_write, dnp, base_offset, uio->uio_resid, + dnp->dn_size, error); + + return error; +} + +static int +dirfs_advlock (struct vop_advlock_args *ap) +{ + struct vnode *vp = ap->a_vp; + dirfs_node_t dnp = VP_TO_NODE(vp); + + debug_called(); + + return (lf_advlock(ap, &dnp->dn_advlock, dnp->dn_size)); +} + +static int +dirfs_strategy(struct vop_strategy_args *ap) +{ + dirfs_node_t dnp; + dirfs_mount_t dmp; + struct bio *bio = ap->a_bio; + struct buf *bp = bio->bio_buf; + struct vnode *vp = ap->a_vp; + int error; + size_t iosize; + char *tmp; + char *pathfree; + + debug_called(); + + dnp = VP_TO_NODE(vp); + dmp = VFS_TO_DIRFS(vp->v_mount); + + error = 0; + + if (vp->v_type != VREG) { + dbg(5, "not VREG\n"); + bp->b_resid = bp->b_bcount; + bp->b_flags |= B_ERROR | B_INVAL; + bp->b_error = EINVAL; + biodone(bio); + return(0); + } + + if (dnp->dn_fd == DIRFS_NOFD) { + print_backtrace(-1); + panic("Meh, no fd to write to. dnp=%p\n", dnp); + } + + if (bio->bio_offset + bp->b_bcount > dnp->dn_size) + iosize = dnp->dn_size - bio->bio_offset; + else + iosize = bp->b_bcount; + KKASSERT((ssize_t)iosize >= 0); + + switch (bp->b_cmd) { + case BUF_CMD_WRITE: + error = pwrite(dnp->dn_fd, bp->b_data, iosize, bio->bio_offset); + break; + case BUF_CMD_READ: + error = pread(dnp->dn_fd, bp->b_data, iosize, bio->bio_offset); + break; + default: + bp->b_error = error = EINVAL; + bp->b_flags |= B_ERROR; + break; + } + + if (error >= 0 && error < bp->b_bcount) + bzero(bp->b_data + error, bp->b_bcount - error); + + if (error < 0 && errno != EINTR) { + dbg(5, "error=%d dnp=%p dnp->dn_fd=%d " + "bio->bio_offset=%ld bcount=%d resid=%d iosize=%zd\n", + errno, dnp, dnp->dn_fd, bio->bio_offset, bp->b_bcount, + bp->b_resid, iosize); + bp->b_error = errno; + bp->b_resid = bp->b_bcount; + bp->b_flags |= B_ERROR; + } else { + tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree); + dirfs_node_stat(DIRFS_NOFD, tmp, dnp); + dirfs_dropfd(dmp, NULL, pathfree); + } + + KTR_LOG(dirfs_strategy, dnp, dnp->dn_size, iosize, bp->b_cmd, + bp->b_error, bp->b_resid, bio->bio_offset, error); + + biodone(bio); + + return 0; +} + +static int +dirfs_bmap(struct vop_bmap_args *ap) +{ + debug_called(); + + if (ap->a_doffsetp != NULL) + *ap->a_doffsetp = ap->a_loffset; + if (ap->a_runp != NULL) + *ap->a_runp = 0; + if (ap->a_runb != NULL) + *ap->a_runb = 0; + + return 0; +} + +static int +dirfs_nremove(struct vop_nremove_args *ap) +{ + dirfs_node_t dnp, pdnp; + dirfs_node_t pathnp; + dirfs_mount_t dmp; + struct vnode *dvp; + struct nchandle *nch; + struct namecache *ncp; + struct mount *mp; + struct vnode *vp; + int error; + char *tmp; + char *pathfree; + debug_called(); + + error = 0; + tmp = NULL; + vp = NULL; + dvp = ap->a_dvp; + nch = ap->a_nch; + ncp = nch->ncp; + + mp = dvp->v_mount; + dmp = VFS_TO_DIRFS(mp); + + lwkt_gettoken(&mp->mnt_token); + cache_vget(nch, ap->a_cred, LK_SHARED, &vp); + vn_unlock(vp); + + pdnp = VP_TO_NODE(dvp); + dnp = VP_TO_NODE(vp); + + if (vp->v_type == VDIR) { + error = EISDIR; + } else { + pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree); + dirfs_node_lock(pdnp); + error = unlinkat(pathnp->dn_fd, tmp, 0); + if (error == 0) { + cache_unlink(nch); + dirfs_node_setpassive(dmp, dnp, 0); + if (dnp->dn_parent) { + dirfs_node_drop(dmp, dnp->dn_parent); + dnp->dn_parent = NULL; + } + } else { + error = errno; + } + dirfs_node_unlock(pdnp); + dirfs_dropfd(dmp, pathnp, pathfree); + } + vrele(vp); + lwkt_reltoken(&mp->mnt_token); + + KTR_LOG(dirfs_nremove, dnp, pdnp, error); + + return error; +} + +static int +dirfs_nlink(struct vop_nlink_args *ap) +{ + debug_called(); + + KTR_LOG(dirfs_unsupported, __func__); + + return EOPNOTSUPP; +} + +static int +dirfs_nrename(struct vop_nrename_args *ap) +{ + dirfs_node_t dnp, fdnp, tdnp; + dirfs_mount_t dmp; + struct namecache *fncp, *tncp; + struct vnode *fdvp, *tdvp, *vp; + struct mount *mp; + char *fpath, *fpathfree; + char *tpath, *tpathfree; + int error; + + debug_called(); + + error = 0; + fdvp = ap->a_fdvp; + tdvp = ap->a_tdvp; + fncp = ap->a_fnch->ncp; + tncp = ap->a_tnch->ncp; + mp = fdvp->v_mount; + dmp = VFS_TO_DIRFS(mp); + fdnp = VP_TO_NODE(fdvp); + tdnp = VP_TO_NODE(tdvp); + + dbg(5, "fdnp=%p tdnp=%p from=%s to=%s\n", fdnp, tdnp, fncp->nc_name, + tncp->nc_name); + + if (fdvp->v_mount != tdvp->v_mount) + return(EXDEV); + if (fdvp->v_mount != fncp->nc_vp->v_mount) + return(EXDEV); + if (fdvp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + + tpath = dirfs_node_absolute_path_plus(dmp, tdnp, + tncp->nc_name, &tpathfree); + fpath = dirfs_node_absolute_path_plus(dmp, fdnp, + fncp->nc_name, &fpathfree); + error = rename(fpath, tpath); + if (error < 0) + error = errno; + if (error == 0) { + vp = fncp->nc_vp; /* file being renamed */ + dnp = VP_TO_NODE(vp); + dirfs_node_setname(dnp, tncp->nc_name, tncp->nc_nlen); + + /* + * We have to mark the target file that was replaced by + * the rename as having been unlinked. + */ + vp = tncp->nc_vp; + if (vp) { + dbg(5, "RENAME2\n"); + dnp = VP_TO_NODE(vp); + cache_unlink(ap->a_tnch); + dirfs_node_setpassive(dmp, dnp, 0); + if (dnp->dn_parent) { + dirfs_node_drop(dmp, dnp->dn_parent); + dnp->dn_parent = NULL; + } + + /* + * nlinks on directories can be a bit weird. Zero + * it out. + */ + dnp->dn_links = 0; + cache_inval_vp(vp, CINV_DESTROY); + } + cache_rename(ap->a_fnch, ap->a_tnch); + } + dirfs_dropfd(dmp, NULL, fpathfree); + dirfs_dropfd(dmp, NULL, tpathfree); + + return error; +} + +static int +dirfs_nmkdir(struct vop_nmkdir_args *ap) +{ + dirfs_mount_t dmp; + dirfs_node_t dnp, pdnp, dnp1; + struct namecache *ncp; + struct vattr *vap; + struct vnode *dvp; + struct vnode **vpp; + char *tmp, *pathfree; + char *path; + int pfd, error; + int extrapath; + + debug_called(); + + extrapath = error = 0; + dvp = ap->a_dvp; + vpp = ap->a_vpp; + dmp = VFS_TO_DIRFS(dvp->v_mount); + pdnp = VP_TO_NODE(dvp); + ncp = ap->a_nch->ncp; + vap = ap->a_vap; + pathfree = tmp = path = NULL; + dnp = NULL; + + dirfs_node_lock(pdnp); + if (pdnp->dn_fd != DIRFS_NOFD) { + pfd = pdnp->dn_fd; + path = ncp->nc_name; + } else { + dnp1 = dirfs_findfd(dmp, pdnp, &tmp, &pathfree); + pfd = dnp1->dn_fd; + /* XXX check there is room to copy the path */ + path = kmalloc(MAXPATHLEN, M_DIRFS_MISC, M_ZERO | M_WAITOK); + ksnprintf(path, MAXPATHLEN, "%s/%s", tmp, ncp->nc_name); + extrapath = 1; + dirfs_dropfd(dmp, dnp1, pathfree); + } + + error = mkdirat(pfd, path, vap->va_mode); + if (error) { + error = errno; + } else { /* Directory has been made */ + error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, vpp, + vap, O_DIRECTORY); + if (error) + error = errno; + cache_setunresolved(ap->a_nch); + cache_setvp(ap->a_nch, *vpp); + } + dirfs_node_unlock(pdnp); + + if (extrapath) + kfree(path, M_DIRFS_MISC); + + KTR_LOG(dirfs_nmkdir, pdnp, dnp, ncp->nc_name, error); + + return error; +} + +static int +dirfs_nrmdir(struct vop_nrmdir_args *ap) +{ + dirfs_node_t dnp, pdnp; + dirfs_mount_t dmp; + struct vnode *dvp; + struct nchandle *nch; + struct namecache *ncp; + struct mount *mp; + struct vnode *vp; + int error; + char *tmp; + char *pathfree; + + debug_called(); + + error = 0; + tmp = NULL; + vp = NULL; + dvp = ap->a_dvp; + nch = ap->a_nch; + ncp = nch->ncp; + + mp = dvp->v_mount; + dmp = VFS_TO_DIRFS(mp); + + lwkt_gettoken(&mp->mnt_token); + cache_vget(nch, ap->a_cred, LK_SHARED, &vp); + vn_unlock(vp); + + pdnp = VP_TO_NODE(dvp); + dnp = VP_TO_NODE(vp); + + if (vp->v_type != VDIR) { + error = ENOTDIR; + } else { + tmp = dirfs_node_absolute_path(dmp, dnp, &pathfree); + dirfs_node_lock(pdnp); + error = rmdir(tmp); + if (error == 0) { + cache_unlink(nch); + dirfs_node_setpassive(dmp, dnp, 0); + if (dnp->dn_parent) { + dirfs_node_drop(dmp, dnp->dn_parent); + dnp->dn_parent = NULL; + } + + /* + * nlinks on directories can be a bit weird. Zero + * it out. + */ + dnp->dn_links = 0; + cache_inval_vp(vp, CINV_DESTROY); + } else { + error = errno; + } + dirfs_node_unlock(pdnp); + dirfs_dropfd(dmp, NULL, pathfree); + } + vrele(vp); + lwkt_reltoken(&mp->mnt_token); + + KTR_LOG(dirfs_nrmdir, dnp, pdnp, error); + + return error; +} + +static int +dirfs_nsymlink(struct vop_nsymlink_args *ap) +{ + dirfs_mount_t dmp; + dirfs_node_t dnp, pdnp; + struct mount *mp; + struct namecache *ncp; + struct vattr *vap; + struct vnode *dvp; + struct vnode **vpp; + char *tmp, *pathfree; + char *path; + int error; + + debug_called(); + + error = 0; + dvp = ap->a_dvp; + vpp = ap->a_vpp; + mp = dvp->v_mount; + dmp = VFS_TO_DIRFS(dvp->v_mount); + pdnp = VP_TO_NODE(dvp); + ncp = ap->a_nch->ncp; + vap = ap->a_vap; + pathfree = tmp = path = NULL; + dnp = NULL; + + lwkt_gettoken(&mp->mnt_token); + vap->va_type = VLNK; + + /* Find out the whole path of our new symbolic link */ + tmp = dirfs_node_absolute_path(dmp, pdnp, &pathfree); + /* XXX check there is room to copy the path */ + path = kmalloc(MAXPATHLEN, M_DIRFS_MISC, M_ZERO | M_WAITOK); + ksnprintf(path, MAXPATHLEN, "%s/%s", tmp, ncp->nc_name); + dirfs_dropfd(dmp, NULL, pathfree); + + error = symlink(ap->a_target, path); + if (error) { + error = errno; + } else { /* Symlink has been made */ + error = dirfs_alloc_file(dmp, &dnp, pdnp, ncp, vpp, + NULL, 0); + if (error) + error = errno; + cache_setunresolved(ap->a_nch); + cache_setvp(ap->a_nch, *vpp); + } + dbg(5, "path=%s a_target=%s\n", path, ap->a_target); + + KTR_LOG(dirfs_nsymlink, dnp, ap->a_target, path, error); + kfree(path, M_DIRFS_MISC); + lwkt_reltoken(&mp->mnt_token); + + return error; + +} + +static int +dirfs_readdir(struct vop_readdir_args *ap) +{ + + struct dirent *dp, *dpn; + off_t __unused **cookies = ap->a_cookies; + int *ncookies = ap->a_ncookies; + int bytes; + char *buf; + long base; + struct vnode *vp = ap->a_vp; + struct uio *uio; + dirfs_node_t dnp; + off_t startoff; + off_t cnt; + int error, r; + size_t bufsiz; + off_t curoff; + + debug_called(); + + if (ncookies) + debug(1, "ncookies=%d\n", *ncookies); + + dnp = VP_TO_NODE(vp); + uio = ap->a_uio; + startoff = uio->uio_offset; + cnt = 0; + error = 0; + base = 0; + bytes = 0; + + if (vp->v_type != VDIR) + return ENOTDIR; + if (uio->uio_resid < 0) + return EINVAL; + if ((bufsiz = uio->uio_resid) > 4096) + bufsiz = 4096; + buf = kmalloc(bufsiz, M_DIRFS_MISC, M_WAITOK | M_ZERO); + + /* + * Generally speaking we have to be able to process ALL the + * entries returned by getdirentries() in order for the seek + * position to be correct. For now try to size the buffer + * to make this happen. A smaller buffer always works. For + * now just use an appropriate size. + */ + dirfs_node_lock(dnp); + lseek(dnp->dn_fd, startoff, SEEK_SET); + bytes = getdirentries(dnp->dn_fd, buf, bufsiz, &base); + dbg(5, "seek %016jx %016jx %016jx\n", + (intmax_t)startoff, (intmax_t)base, + (intmax_t)lseek(dnp->dn_fd, 0, SEEK_CUR)); + if (bytes < 0) { + if (errno == EINVAL) + panic("EINVAL on readdir\n"); + error = errno; + curoff = startoff; + goto out; + } else if (bytes == 0) { + *ap->a_eofflag = 1; + curoff = startoff; + goto out; + } + + for (dp = (struct dirent *)buf; bytes > 0 && uio->uio_resid > 0; + bytes -= _DIRENT_DIRSIZ(dp), dp = dpn) { + r = vop_write_dirent(&error, uio, dp->d_ino, dp->d_type, + dp->d_namlen, dp->d_name); + if (error || r) + break; + dpn = _DIRENT_NEXT(dp); + dp = dpn; + cnt++; + } + curoff = lseek(dnp->dn_fd, 0, SEEK_CUR); + +out: + kfree(buf, M_DIRFS_MISC); + uio->uio_offset = curoff; + dirfs_node_unlock(dnp); + + KTR_LOG(dirfs_readdir, dnp, dnp->dn_fd, startoff, uio->uio_offset); + + return error; +} + +static int +dirfs_readlink(struct vop_readlink_args *ap) +{ + dirfs_node_t dnp, pathnp; + dirfs_mount_t dmp; + struct vnode *vp; + struct mount *mp; + struct uio *uio; + char *tmp, *pathfree, *buf; + ssize_t nlen; + int error; + + debug_called(); + + vp = ap->a_vp; + + KKASSERT(vp->v_type == VLNK); + + error = 0; + tmp = pathfree = NULL; + uio = ap->a_uio; + mp = vp->v_mount; + dmp = VFS_TO_DIRFS(mp); + dnp = VP_TO_NODE(vp); + + lwkt_gettoken(&mp->mnt_token); + + pathnp = dirfs_findfd(dmp, dnp, &tmp, &pathfree); + + buf = kmalloc(uio->uio_resid, M_DIRFS_MISC, M_WAITOK | M_ZERO); + nlen = readlinkat(pathnp->dn_fd, dnp->dn_name, buf, uio->uio_resid); + if (nlen == -1 ) { + error = errno; + } else { + error = uiomove(buf, nlen + 1, uio); + buf[nlen] = '\0'; + if (error) + error = errno; + } + dirfs_dropfd(dmp, pathnp, pathfree); + kfree(buf, M_DIRFS_MISC); + + lwkt_reltoken(&mp->mnt_token); + + return error; +} + +/* + * Main tasks to be performed. + * 1) When inode is NULL recycle the vnode + * 2) When the inode has 0 links: + * - Check if in the TAILQ, if so remove. + * - Destroy the inode. + * - Recycle the vnode. + * 3) If none of the above, add the node to the TAILQ + * when it has a valid fd and there is room on the + * queue. + * + */ +static int +dirfs_inactive(struct vop_inactive_args *ap) +{ + struct vnode *vp; + dirfs_mount_t dmp; + dirfs_node_t dnp; + + debug_called(); + + vp = ap->a_vp; + dmp = VFS_TO_DIRFS(vp->v_mount); + dnp = VP_TO_NODE(vp); + + /* Degenerate case */ + if (dnp == NULL) { + dbg(5, "dnp was NULL\n"); + vrecycle(vp); + return 0; + } + + dirfs_mount_gettoken(dmp); + + /* + * Deal with the case the inode has 0 links which means it was unlinked. + */ + if (dnp->dn_links == 0) { + vrecycle(vp); + dbg(5, "recycled a vnode of an unlinked dnp\n"); + + goto out; + } + + /* + * Try to retain the fd in our fd cache. + */ + dirfs_node_setpassive(dmp, dnp, 1); +out: + dirfs_mount_reltoken(dmp); + + return 0; + +} + +int +dirfs_reclaim(struct vop_reclaim_args *ap) +{ + struct vnode *vp; + dirfs_node_t dnp; + dirfs_mount_t dmp; + + debug_called(); + + vp = ap->a_vp; + dnp = VP_TO_NODE(vp); + dmp = VFS_TO_DIRFS(vp->v_mount); + + dirfs_free_vp(dmp, dnp); + /* dnp is now invalid, may have been destroyed */ + + return 0; +} + +static int +dirfs_mountctl(struct vop_mountctl_args *ap) +{ + debug_called(); + + KTR_LOG(dirfs_unsupported, __func__); + + return EOPNOTSUPP; +} + +static int +dirfs_print(struct vop_print_args *v) +{ + debug_called(); + + KTR_LOG(dirfs_unsupported, __func__); + + return EOPNOTSUPP; +} + +static int __unused +dirfs_pathconf(struct vop_pathconf_args *v) +{ + debug_called(); + + return EOPNOTSUPP; +} + +static int +dirfs_kqfilter (struct vop_kqfilter_args *ap) +{ + debug_called(); + + KTR_LOG(dirfs_unsupported, __func__); + + return EOPNOTSUPP; +} + +struct vop_ops dirfs_vnode_vops = { + .vop_default = vop_defaultop, + .vop_nwhiteout = vop_compat_nwhiteout, + .vop_ncreate = dirfs_ncreate, + .vop_nresolve = dirfs_nresolve, + .vop_markatime = vop_stdmarkatime, + .vop_nlookupdotdot = dirfs_nlookupdotdot, + .vop_nmknod = dirfs_nmknod, + .vop_open = dirfs_open, + .vop_close = dirfs_close, + .vop_access = dirfs_access, + .vop_getattr = dirfs_getattr, + .vop_setattr = dirfs_setattr, + .vop_read = dirfs_read, + .vop_write = dirfs_write, + .vop_fsync = dirfs_fsync, + .vop_mountctl = dirfs_mountctl, + .vop_nremove = dirfs_nremove, + .vop_nlink = dirfs_nlink, + .vop_nrename = dirfs_nrename, + .vop_nmkdir = dirfs_nmkdir, + .vop_nrmdir = dirfs_nrmdir, + .vop_nsymlink = dirfs_nsymlink, + .vop_readdir = dirfs_readdir, + .vop_readlink = dirfs_readlink, + .vop_inactive = dirfs_inactive, + .vop_reclaim = dirfs_reclaim, + .vop_print = dirfs_print, + .vop_pathconf = vop_stdpathconf, + .vop_bmap = dirfs_bmap, + .vop_strategy = dirfs_strategy, + .vop_advlock = dirfs_advlock, + .vop_kqfilter = dirfs_kqfilter, + .vop_getpages = vop_stdgetpages, + .vop_putpages = vop_stdputpages +}; -- 2.41.0 From a6a0267c240cf17db2c38baf1865d913fa8e30d6 Mon Sep 17 00:00:00 2001 From: Antonio Huete Jimenez Date: Thu, 5 Sep 2013 06:58:27 -0700 Subject: [PATCH 16/16] test/vkernel - Fix build. --- test/vkernel/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/test/vkernel/Makefile b/test/vkernel/Makefile index 907457a60c..8e75350c67 100644 --- a/test/vkernel/Makefile +++ b/test/vkernel/Makefile @@ -3,7 +3,6 @@ # require it to be specified SRCDIR ?= ${.CURDIR}/../.. -ROOTSIZE ?= PHYSMEM ?= 256m NCPUS ?= 2 FSTYPE ?= -- 2.41.0