From 3f5b8b3b1546a3803bad2d997deefa0f4e30549d Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 5 Nov 2013 22:49:44 -0800 Subject: [PATCH 01/16] hammer2 - performance, stabilization * Add hysteresis to anything calling H2's strategy functions. Because the BIOs are queued to a thread and compression might be required, it is possible for buffer flushes to queue thousands of BIOs to the thread all at once. This can result in thousands of locked BUFs which then stall frontend code. Stall strategy calls (typically the buffer flush code) after queueing a BIO when the number of pending file BIOs exceeds vfs.hammer2.flush_pipe, whos value defaults to 100. Hysteresis is set at 3/4ths the value so a maximum efficiency pipeline is maintained. * Do not try to update blockrefs in an inode when the inode is flagged DIRECTDATA. This case can occur when a hardlink is shifted up to a higher directory. The original inode is converted into an OBJT_HARDLINK object which has no file data. Fixes a panic. --- sys/vfs/hammer2/hammer2.h | 8 +++++ sys/vfs/hammer2/hammer2_flush.c | 34 ++++++-------------- sys/vfs/hammer2/hammer2_vfsops.c | 53 ++++++++++++++++++++++++++++++++ sys/vfs/hammer2/hammer2_vnops.c | 5 ++- 4 files changed, 75 insertions(+), 25 deletions(-) diff --git a/sys/vfs/hammer2/hammer2.h b/sys/vfs/hammer2/hammer2.h index 35c9b46a7b..599b82b3e8 100644 --- a/sys/vfs/hammer2/hammer2.h +++ b/sys/vfs/hammer2/hammer2.h @@ -562,6 +562,7 @@ struct hammer2_pfsmount { long inmem_inodes; long inmem_chains; int inmem_waiting; + int count_lwinprog; /* logical write in prog */ thread_t wthread_td; /* write thread td */ struct bio_queue_head wthread_bioq; /* logical buffer bioq */ struct mtx wthread_mtx; /* interlock */ @@ -570,6 +571,9 @@ struct hammer2_pfsmount { typedef struct hammer2_pfsmount hammer2_pfsmount_t; +#define HAMMER2_LWINPROG_WAITING 0x80000000 +#define HAMMER2_LWINPROG_MASK 0x7FFFFFFF + #if defined(_KERNEL) MALLOC_DECLARE(M_HAMMER2); @@ -623,6 +627,7 @@ extern struct vop_ops hammer2_fifo_vops; extern int hammer2_debug; extern int hammer2_cluster_enable; extern int hammer2_hardlink_enable; +extern int hammer2_flush_pipe; extern long hammer2_iod_file_read; extern long hammer2_iod_meta_read; extern long hammer2_iod_indr_read; @@ -870,6 +875,9 @@ void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp); void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp); void hammer2_bioq_sync(hammer2_pfsmount_t *pmp); int hammer2_vfs_sync(struct mount *mp, int waitflags); +void hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp); +void hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp); +void hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp); /* * hammer2_freemap.c diff --git a/sys/vfs/hammer2/hammer2_flush.c b/sys/vfs/hammer2/hammer2_flush.c index 4d33f197c7..de2185a75e 100644 --- a/sys/vfs/hammer2/hammer2_flush.c +++ b/sys/vfs/hammer2/hammer2_flush.c @@ -1249,19 +1249,6 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) #if FLUSH_DEBUG kprintf("SCAN2 %p.%d %08x mod=%016jx del=%016jx trans=%016jx\n", child, child->bref.type, child->flags, child->modify_tid, child->delete_tid, info->trans->sync_tid); #endif - /* - * Inodes with stale children that have been converted to DIRECTDATA - * mode (file extension or hardlink conversion typically) need to - * skipped right now before we start messing with a non-existant - * block table. - */ -#if 0 - if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && - (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA)) { - goto finalize; - } -#endif - /* * Ignore children created after our flush point, treating them as * if they did not exist). These children will not cause the parent @@ -1343,20 +1330,19 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) switch(parent->bref.type) { case HAMMER2_BREF_TYPE_INODE: /* - * XXX Should assert that OPFLAG_DIRECTDATA is 0 once we - * properly duplicate the inode headers and do proper flush - * range checks (all the children should be beyond the flush - * point). For now just don't sync the non-applicable - * children. - * - * XXX Can also occur due to hardlink consolidation. We - * set OPFLAG_DIRECTDATA to prevent the indirect and data - * blocks from syncing ot the hardlink pointer. + * Access the inode's block array. However, there is no + * block array if the inode is flagged DIRECTDATA. The + * DIRECTDATA case typicaly only occurs when a hardlink has + * been shifted up the tree and the original inode gets + * replaced with an OBJTYPE_HARDLINK placeholding inode. */ - if (parent->data) + if (parent->data && + (parent->data->ipdata.op_flags & + HAMMER2_OPFLAG_DIRECTDATA) == 0) { base = &parent->data->ipdata.u.blockset.blockref[0]; - else + } else { base = NULL; + } count = HAMMER2_SET_COUNT; break; case HAMMER2_BREF_TYPE_INDIRECT: diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index b13bf93f90..1c723e2eff 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -81,6 +81,7 @@ static struct lock hammer2_mntlk; int hammer2_debug; int hammer2_cluster_enable = 1; int hammer2_hardlink_enable = 1; +int hammer2_flush_pipe = 100; long hammer2_iod_file_read; long hammer2_iod_meta_read; long hammer2_iod_indr_read; @@ -116,6 +117,8 @@ SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, &hammer2_cluster_enable, 0, ""); SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, &hammer2_hardlink_enable, 0, ""); +SYSCTL_INT(_vfs_hammer2, OID_AUTO, flush_pipe, CTLFLAG_RW, + &hammer2_flush_pipe, 0, ""); SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, &hammer2_iod_file_read, 0, ""); @@ -746,6 +749,8 @@ hammer2_write_thread(void *arg) * else normal bio processing */ mtx_unlock(&pmp->wthread_mtx); + + hammer2_lwinprog_drop(pmp); error = 0; bp = bio->bio_buf; @@ -2154,6 +2159,54 @@ hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index) } } +/* + * This handles hysteresis on regular file flushes. Because the BIOs are + * routed to a thread it is possible for an excessive number to build up + * and cause long front-end stalls long before the runningbuffspace limit + * is hit, so we implement hammer2_flush_pipe to control the + * hysteresis. + * + * This is a particular problem when compression is used. + */ +void +hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp) +{ + atomic_add_int(&pmp->count_lwinprog, 1); +} + +void +hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp) +{ + int lwinprog; + + lwinprog = atomic_fetchadd_int(&pmp->count_lwinprog, -1); + if ((lwinprog & HAMMER2_LWINPROG_WAITING) && + (lwinprog & HAMMER2_LWINPROG_MASK) <= hammer2_flush_pipe * 2 / 3) { + atomic_clear_int(&pmp->count_lwinprog, + HAMMER2_LWINPROG_WAITING); + wakeup(&pmp->count_lwinprog); + } +} + +void +hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp) +{ + int lwinprog; + + for (;;) { + lwinprog = pmp->count_lwinprog; + cpu_ccfence(); + if ((lwinprog & HAMMER2_LWINPROG_MASK) < hammer2_flush_pipe) + break; + tsleep_interlock(&pmp->count_lwinprog, 0); + atomic_set_int(&pmp->count_lwinprog, HAMMER2_LWINPROG_WAITING); + lwinprog = pmp->count_lwinprog; + if ((lwinprog & HAMMER2_LWINPROG_MASK) < hammer2_flush_pipe) + break; + tsleep(&pmp->count_lwinprog, PINTERLOCKED, "h2wpipe", hz); + } +} + void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp) { diff --git a/sys/vfs/hammer2/hammer2_vnops.c b/sys/vfs/hammer2/hammer2_vnops.c index 0c91406dc3..abba4e34eb 100644 --- a/sys/vfs/hammer2/hammer2_vnops.c +++ b/sys/vfs/hammer2/hammer2_vnops.c @@ -2117,14 +2117,17 @@ hammer2_strategy_write(struct vop_strategy_args *ap) ip = VTOI(ap->a_vp); pmp = ip->pmp; + hammer2_lwinprog_ref(pmp); mtx_lock(&pmp->wthread_mtx); if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) { bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); + mtx_unlock(&pmp->wthread_mtx); wakeup(&pmp->wthread_bioq); } else { bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); + mtx_unlock(&pmp->wthread_mtx); } - mtx_unlock(&pmp->wthread_mtx); + hammer2_lwinprog_wait(pmp); return(0); } -- 2.41.0 From ed2fcb49bc9ec16d72f277779516b2dbc8178ebd Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 6 Nov 2013 10:31:04 +0100 Subject: [PATCH 02/16] libwrap: Make it build with -std=gnu99. --- contrib/tcp_wrappers/clean_exit.c | 1 + contrib/tcp_wrappers/hosts_access.c | 3 ++- contrib/tcp_wrappers/options.c | 1 + contrib/tcp_wrappers/percent_x.c | 1 + contrib/tcp_wrappers/rfc931.c | 2 +- contrib/tcp_wrappers/shell_cmd.c | 2 ++ contrib/tcp_wrappers/update.c | 2 +- lib/libwrap/Makefile | 1 - 8 files changed, 9 insertions(+), 4 deletions(-) diff --git a/contrib/tcp_wrappers/clean_exit.c b/contrib/tcp_wrappers/clean_exit.c index cb9d4f5080..41caaf0306 100644 --- a/contrib/tcp_wrappers/clean_exit.c +++ b/contrib/tcp_wrappers/clean_exit.c @@ -13,6 +13,7 @@ static char sccsid[] = "@(#) clean_exit.c 1.4 94/12/28 17:42:19"; #endif #include +#include extern void exit(); diff --git a/contrib/tcp_wrappers/hosts_access.c b/contrib/tcp_wrappers/hosts_access.c index e7b5d942c1..4946ed790c 100644 --- a/contrib/tcp_wrappers/hosts_access.c +++ b/contrib/tcp_wrappers/hosts_access.c @@ -17,7 +17,6 @@ * Author: Wietse Venema, Eindhoven University of Technology, The Netherlands. * * $FreeBSD: src/contrib/tcp_wrappers/hosts_access.c,v 1.3.2.1 2000/07/18 08:34:54 ume Exp $ - * $DragonFly: src/contrib/tcp_wrappers/hosts_access.c,v 1.3 2005/04/29 00:37:08 joerg Exp $ */ #ifndef lint @@ -45,6 +44,8 @@ static char sccsid[] = "@(#) hosts_access.c 1.21 97/02/12 02:13:22"; #ifdef INET6 #include #endif +#include +#include extern char *fgets(); diff --git a/contrib/tcp_wrappers/options.c b/contrib/tcp_wrappers/options.c index bcb26b4da5..d30d5d5e80 100644 --- a/contrib/tcp_wrappers/options.c +++ b/contrib/tcp_wrappers/options.c @@ -49,6 +49,7 @@ static char sccsid[] = "@(#) options.c 1.17 96/02/11 17:01:31"; #include #include #include +#include #ifndef MAXPATHNAMELEN #define MAXPATHNAMELEN BUFSIZ diff --git a/contrib/tcp_wrappers/percent_x.c b/contrib/tcp_wrappers/percent_x.c index c95a1ea414..9b37329cf3 100644 --- a/contrib/tcp_wrappers/percent_x.c +++ b/contrib/tcp_wrappers/percent_x.c @@ -19,6 +19,7 @@ static char sccsid[] = "@(#) percent_x.c 1.4 94/12/28 17:42:37"; #include #include #include +#include extern void exit(); diff --git a/contrib/tcp_wrappers/rfc931.c b/contrib/tcp_wrappers/rfc931.c index faacd48969..9a8fc52ec9 100644 --- a/contrib/tcp_wrappers/rfc931.c +++ b/contrib/tcp_wrappers/rfc931.c @@ -9,7 +9,6 @@ * Author: Wietse Venema, Eindhoven University of Technology, The Netherlands. * * $FreeBSD: src/contrib/tcp_wrappers/rfc931.c,v 1.2.2.1 2000/07/18 16:41:11 dwmalone Exp $ - * $DragonFly: src/contrib/tcp_wrappers/rfc931.c,v 1.2 2003/06/17 04:24:06 dillon Exp $ */ #ifndef lint @@ -26,6 +25,7 @@ static char sccsid[] = "@(#) rfc931.c 1.10 95/01/02 16:11:34"; #include #include #include +#include #ifndef SEEK_SET #define SEEK_SET 0 diff --git a/contrib/tcp_wrappers/shell_cmd.c b/contrib/tcp_wrappers/shell_cmd.c index dc46f925b0..7667e13301 100644 --- a/contrib/tcp_wrappers/shell_cmd.c +++ b/contrib/tcp_wrappers/shell_cmd.c @@ -16,11 +16,13 @@ static char sccsid[] = "@(#) shell_cmd.c 1.5 94/12/28 17:42:44"; #include #include +#include #include #include #include #include #include +#include extern void exit(); diff --git a/contrib/tcp_wrappers/update.c b/contrib/tcp_wrappers/update.c index b31c53976d..ec8870922c 100644 --- a/contrib/tcp_wrappers/update.c +++ b/contrib/tcp_wrappers/update.c @@ -13,7 +13,6 @@ * Author: Wietse Venema, Eindhoven University of Technology, The Netherlands. * * $FreeBSD: src/contrib/tcp_wrappers/update.c,v 1.2 2000/02/03 10:27:00 shin Exp $ - * $DragonFly: src/contrib/tcp_wrappers/update.c,v 1.2 2003/06/17 04:24:06 dillon Exp $ */ #ifndef lint @@ -25,6 +24,7 @@ static char sccsid[] = "@(#) update.c 1.1 94/12/28 17:42:56"; #include #include #include +#include /* Local stuff. */ diff --git a/lib/libwrap/Makefile b/lib/libwrap/Makefile index 1b645a1747..ccd7b3e738 100644 --- a/lib/libwrap/Makefile +++ b/lib/libwrap/Makefile @@ -6,7 +6,6 @@ SHLIB_MAJOR= 4 INCS= tcpd.h MAN= hosts_access.3 MAN+= hosts_access.5 hosts_options.5 -CSTD?= gnu89 WARNS?= 1 .PATH: ${.CURDIR}/../../contrib/tcp_wrappers -- 2.41.0 From 790a83e5ad3c64adb2f406bdcc9354c87a7c3e26 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 6 Nov 2013 13:44:04 +0100 Subject: [PATCH 03/16] ipfw(8): Add missing header for _long_to_time(). --- sbin/ipfw/ipfw2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index b4b4324979..b4b07300bd 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include -- 2.41.0 From b0eeb746d431b8d19732530118a15612fe880b26 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 6 Nov 2013 14:37:37 +0100 Subject: [PATCH 04/16] Adjust the usual files for 3.7 on master. --- gnu/usr.bin/groff/tmac/mdoc.local | 3 ++- sys/conf/newvers.sh | 2 +- sys/sys/param.h | 4 +++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/gnu/usr.bin/groff/tmac/mdoc.local b/gnu/usr.bin/groff/tmac/mdoc.local index b257288614..559d771994 100644 --- a/gnu/usr.bin/groff/tmac/mdoc.local +++ b/gnu/usr.bin/groff/tmac/mdoc.local @@ -69,7 +69,7 @@ . . .\" Default .Os value -.ds doc-default-operating-system DragonFly\~3.5 +.ds doc-default-operating-system DragonFly\~3.7 . . .\" DragonFly releases not found in doc-common @@ -90,6 +90,7 @@ .ds doc-operating-system-DragonFly-3.1 3.1 .ds doc-operating-system-DragonFly-3.3 3.3 .ds doc-operating-system-DragonFly-3.5 3.5 +.ds doc-operating-system-DragonFly-3.7 3.7 . .\" FreeBSD releases not found in doc-common. .ds doc-operating-system-FreeBSD-7.2 7.2 diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh index 9f683f7234..bcb731b1cf 100644 --- a/sys/conf/newvers.sh +++ b/sys/conf/newvers.sh @@ -44,7 +44,7 @@ fi # Set the branch # -BRANCH="DEVELOPMENT_3_5" +BRANCH="DEVELOPMENT_3_7" TYPE="DragonFly" diff --git a/sys/sys/param.h b/sys/sys/param.h index cc3df42c43..81034a26af 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -102,9 +102,11 @@ * 300501 - Convert libm to FreeBSD's version * 300502 - GEM and i915 KMS support in kernel * 300503 - Upgrade libiconv, locales, and associated libc functions + * 300600 - 3.6 release + * 300700 - 3.5 master */ #undef __DragonFly_version -#define __DragonFly_version 300503 /* propagated to newvers */ +#define __DragonFly_version 300700 /* propagated to newvers */ #include -- 2.41.0 From 1cd61a7c742213b63eca69f8cd2e52ff83792398 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Wed, 6 Nov 2013 23:00:21 +0800 Subject: [PATCH 05/16] mxge: Properly setup RSS key Thank folks at Myricom very much for sending me information about the firmware RSS key length. --- sys/dev/netif/mxge/if_mxge.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/sys/dev/netif/mxge/if_mxge.c b/sys/dev/netif/mxge/if_mxge.c index b74f98c370..1aa0392f7d 100644 --- a/sys/dev/netif/mxge/if_mxge.c +++ b/sys/dev/netif/mxge/if_mxge.c @@ -60,6 +60,7 @@ $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ #include #include #include +#include #include #include @@ -85,6 +86,7 @@ $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ #include #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD) +#define MXGE_HWRSS_KEYLEN 16 /* Tunable params */ static int mxge_nvidia_ecrc_enable = 1; @@ -3361,6 +3363,33 @@ mxge_open(mxge_softc_t *sc) for (i = 0; i < sc->num_slices; i++) itable[i] = (uint8_t)i; + if (sc->use_rss) { + volatile uint8_t *hwkey; + uint8_t swkey[MXGE_HWRSS_KEYLEN]; + + err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET, + &cmd); + if (err != 0) { + if_printf(ifp, "failed to get rsskey\n"); + return err; + } + hwkey = sc->sram + cmd.data0; + + toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN); + for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i) + hwkey[i] = swkey[i]; + wmb(); + + err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED, + &cmd); + if (err != 0) { + if_printf(ifp, "failed to update rsskey\n"); + return err; + } + if (bootverbose) + if_printf(ifp, "RSS key updated\n"); + } + cmd.data0 = 1; if (sc->use_rss) { if (bootverbose) -- 2.41.0 From 4badba3841ae9f4d60211d1c5ed006e17b38c299 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 6 Nov 2013 18:50:49 +0100 Subject: [PATCH 06/16] tcpd{chk,match}(8): Fix compilation with -std=gnu99. --- contrib/tcp_wrappers/fakelog.c | 4 ++++ contrib/tcp_wrappers/inetcf.c | 2 ++ contrib/tcp_wrappers/scaffold.c | 2 +- contrib/tcp_wrappers/tcpdchk.c | 3 ++- contrib/tcp_wrappers/tcpdmatch.c | 2 +- usr.sbin/tcpdchk/Makefile | 4 +--- usr.sbin/tcpdmatch/Makefile | 4 +--- 7 files changed, 12 insertions(+), 9 deletions(-) diff --git a/contrib/tcp_wrappers/fakelog.c b/contrib/tcp_wrappers/fakelog.c index fa9e06e1a2..a8727c610f 100644 --- a/contrib/tcp_wrappers/fakelog.c +++ b/contrib/tcp_wrappers/fakelog.c @@ -17,6 +17,7 @@ static char sccsid[] = "@(#) fakelog.c 1.3 94/12/28 17:42:21"; /* ARGSUSED */ +void openlog(name, logopt, facility) char *name; int logopt; @@ -27,6 +28,7 @@ int facility; /* vsyslog - format one record */ +void vsyslog(severity, fmt, ap) int severity; char *fmt; @@ -43,6 +45,7 @@ va_list ap; /* VARARGS */ +void VARARGS(syslog, int, severity) { va_list ap; @@ -56,6 +59,7 @@ VARARGS(syslog, int, severity) /* closelog - dummy */ +void closelog() { /* void */ diff --git a/contrib/tcp_wrappers/inetcf.c b/contrib/tcp_wrappers/inetcf.c index 60c1328ed3..e89d89058c 100644 --- a/contrib/tcp_wrappers/inetcf.c +++ b/contrib/tcp_wrappers/inetcf.c @@ -14,11 +14,13 @@ static char sccsid[] = "@(#) inetcf.c 1.7 97/02/12 02:13:23"; #include #include #include +#include extern void exit(); #include "tcpd.h" #include "inetcf.h" +#include "scaffold.h" /* * Network configuration files may live in unusual places. Here are some diff --git a/contrib/tcp_wrappers/scaffold.c b/contrib/tcp_wrappers/scaffold.c index 76eec0cb12..387bcbf10d 100644 --- a/contrib/tcp_wrappers/scaffold.c +++ b/contrib/tcp_wrappers/scaffold.c @@ -4,7 +4,6 @@ * Author: Wietse Venema, Eindhoven University of Technology, The Netherlands. * * $FreeBSD: src/contrib/tcp_wrappers/scaffold.c,v 1.2.2.1 2000/07/18 08:34:55 ume Exp $ - * $DragonFly: src/contrib/tcp_wrappers/scaffold.c,v 1.3 2005/09/04 01:53:07 sephe Exp $ */ #ifndef lint @@ -23,6 +22,7 @@ static char sccs_id[] = "@(#) scaffold.c 1.6 97/03/21 19:27:24"; #include #include #include +#include #ifndef INADDR_NONE #define INADDR_NONE (-1) /* XXX should be 0xffffffff */ diff --git a/contrib/tcp_wrappers/tcpdchk.c b/contrib/tcp_wrappers/tcpdchk.c index ac06cd00b4..c1b8e4f2aa 100644 --- a/contrib/tcp_wrappers/tcpdchk.c +++ b/contrib/tcp_wrappers/tcpdchk.c @@ -14,7 +14,6 @@ * Author: Wietse Venema, Eindhoven University of Technology, The Netherlands. * * $FreeBSD: src/contrib/tcp_wrappers/tcpdchk.c,v 1.3.2.1 2000/07/18 08:34:55 ume Exp $ - * $DragonFly: src/contrib/tcp_wrappers/tcpdchk.c,v 1.3 2005/04/29 01:00:27 joerg Exp $ */ #ifndef lint @@ -36,6 +35,8 @@ static char sccsid[] = "@(#) tcpdchk.c 1.8 97/02/12 02:13:25"; #include #include #include +#include +#include extern void exit(); extern int optind; diff --git a/contrib/tcp_wrappers/tcpdmatch.c b/contrib/tcp_wrappers/tcpdmatch.c index 047e2b93d1..1e073c38ce 100644 --- a/contrib/tcp_wrappers/tcpdmatch.c +++ b/contrib/tcp_wrappers/tcpdmatch.c @@ -13,7 +13,6 @@ * Author: Wietse Venema, Eindhoven University of Technology, The Netherlands. * * $FreeBSD: src/contrib/tcp_wrappers/tcpdmatch.c,v 1.2.2.1 2000/07/18 08:34:55 ume Exp $ - * $DragonFly: src/contrib/tcp_wrappers/tcpdmatch.c,v 1.2 2003/06/17 04:24:06 dillon Exp $ */ #ifndef lint @@ -32,6 +31,7 @@ static char sccsid[] = "@(#) tcpdmatch.c 1.5 96/02/11 17:01:36"; #include #include #include +#include extern void exit(); extern int optind; diff --git a/usr.sbin/tcpdchk/Makefile b/usr.sbin/tcpdchk/Makefile index 696630f57d..98afb1459e 100644 --- a/usr.sbin/tcpdchk/Makefile +++ b/usr.sbin/tcpdchk/Makefile @@ -1,13 +1,11 @@ # # $FreeBSD: src/usr.sbin/tcpdchk/Makefile,v 1.3.2.2 2001/04/25 12:11:00 ru Exp $ -# $DragonFly: src/usr.sbin/tcpdchk/Makefile,v 1.3 2004/01/31 06:56:46 dillon Exp $ # PROG= tcpdchk MAN= tcpdchk.8 SRCS= tcpdchk.c fakelog.c inetcf.c scaffold.c -CSTD?= gnu89 -WARNS?= 0 +WARNS?= 1 CFLAGS= -DREAL_DAEMON_DIR=\"/usr/libexec\" \ -DSEVERITY=LOG_INFO -DRFC931_TIMEOUT=10 \ diff --git a/usr.sbin/tcpdmatch/Makefile b/usr.sbin/tcpdmatch/Makefile index 6a552b3ecf..e40262ae24 100644 --- a/usr.sbin/tcpdmatch/Makefile +++ b/usr.sbin/tcpdmatch/Makefile @@ -1,13 +1,11 @@ # # $FreeBSD: src/usr.sbin/tcpdmatch/Makefile,v 1.2.2.2 2001/04/25 12:11:01 ru Exp $ -# $DragonFly: src/usr.sbin/tcpdmatch/Makefile,v 1.3 2004/01/31 06:56:46 dillon Exp $ # PROG= tcpdmatch MAN= tcpdmatch.8 SRCS= tcpdmatch.c fakelog.c inetcf.c scaffold.c -CSTD?= gnu89 -WARNS?= 0 +WARNS?= 1 CFLAGS= -DREAL_DAEMON_DIR=\"/usr/libexec\" \ -DSEVERITY=LOG_INFO -DRFC931_TIMEOUT=10 -- 2.41.0 From 144a4253e42fa51d458e2ca47492b3e134ebbe36 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 6 Nov 2013 19:09:07 +0100 Subject: [PATCH 07/16] libsmb: Make it compile with -std=gnu99. --- contrib/smbfs/lib/smb/mbuf.c | 1 + contrib/smbfs/lib/smb/rq.c | 2 +- lib/libsmb/Makefile | 3 +-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/smbfs/lib/smb/mbuf.c b/contrib/smbfs/lib/smb/mbuf.c index 93dd9bfca9..2193d50862 100644 --- a/contrib/smbfs/lib/smb/mbuf.c +++ b/contrib/smbfs/lib/smb/mbuf.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include diff --git a/contrib/smbfs/lib/smb/rq.c b/contrib/smbfs/lib/smb/rq.c index 7268e58422..d70ab6e6be 100644 --- a/contrib/smbfs/lib/smb/rq.c +++ b/contrib/smbfs/lib/smb/rq.c @@ -163,7 +163,7 @@ smb_t2_request(struct smb_ctx *ctx, int setup, int setupcount, bzero(&krq, sizeof(krq)); krq.ioc_setup[0] = setup; krq.ioc_setupcnt = setupcount; - krq.ioc_name = name; + krq.ioc_name = (char *)name; krq.ioc_tparamcnt = tparamcnt; krq.ioc_tparam = tparam; krq.ioc_tdatacnt = tdatacnt; diff --git a/lib/libsmb/Makefile b/lib/libsmb/Makefile index 5f475dab52..9566998b97 100644 --- a/lib/libsmb/Makefile +++ b/lib/libsmb/Makefile @@ -2,8 +2,7 @@ LIB= smb -CSTD?= gnu89 -WARNS?= 0 +WARNS?= 1 SHLIB_MAJOR= 2 -- 2.41.0 From d37f1b57dcde8b5327e5dbf772d1103f3f57774c Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 6 Nov 2013 19:11:00 +0100 Subject: [PATCH 08/16] tcpd(8): Make it compile with -std=gnu99. --- contrib/tcp_wrappers/tcpd.c | 3 ++- libexec/tcpd/Makefile | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/tcp_wrappers/tcpd.c b/contrib/tcp_wrappers/tcpd.c index 6e9049f0c4..ab367c216a 100644 --- a/contrib/tcp_wrappers/tcpd.c +++ b/contrib/tcp_wrappers/tcpd.c @@ -10,7 +10,6 @@ * Author: Wietse Venema, Eindhoven University of Technology, The Netherlands. * * $FreeBSD: src/contrib/tcp_wrappers/tcpd.c,v 1.2 2000/02/03 10:26:59 shin Exp $ - * $DragonFly: src/contrib/tcp_wrappers/tcpd.c,v 1.2 2003/06/17 04:24:06 dillon Exp $ */ #ifndef lint @@ -27,6 +26,7 @@ static char sccsid[] = "@(#) tcpd.c 1.10 96/02/11 17:01:32"; #include #include #include +#include #ifndef MAXPATHNAMELEN #define MAXPATHNAMELEN BUFSIZ @@ -44,6 +44,7 @@ static char sccsid[] = "@(#) tcpd.c 1.10 96/02/11 17:01:32"; int allow_severity = SEVERITY; /* run-time adjustable */ int deny_severity = LOG_WARNING; /* ditto */ +int main(argc, argv) int argc; char **argv; diff --git a/libexec/tcpd/Makefile b/libexec/tcpd/Makefile index f36909c156..1306303539 100644 --- a/libexec/tcpd/Makefile +++ b/libexec/tcpd/Makefile @@ -2,7 +2,6 @@ PROG= tcpd MAN= tcpd.8 -CSTD?= gnu89 WARNS?= 1 CFLAGS+=-DREAL_DAEMON_DIR=\"/usr/libexec\" \ -- 2.41.0 From f0dce136d16aa094ec35c59c4a441be374c2d317 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 6 Nov 2013 19:25:46 +0100 Subject: [PATCH 09/16] : Fix comment. --- sys/sys/param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/sys/param.h b/sys/sys/param.h index 81034a26af..148f171f4a 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -103,7 +103,7 @@ * 300502 - GEM and i915 KMS support in kernel * 300503 - Upgrade libiconv, locales, and associated libc functions * 300600 - 3.6 release - * 300700 - 3.5 master + * 300700 - 3.7 master */ #undef __DragonFly_version #define __DragonFly_version 300700 /* propagated to newvers */ -- 2.41.0 From 2a8b1c4073b97c6322df5ca8a78bd80759193706 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 6 Nov 2013 11:27:31 -0800 Subject: [PATCH 10/16] hammer2 - Stabilization * Fix bugs in hammer2_chain_insert(). Chain->inlayer was not being properly set in all cases. Also, the core->chain_count was not tracking properly and could lead to premature removal or even prevent removal. * Fix a double-unlock bug on oparent in hammer2_chain_getparent() which could occur when the function races a duplication. --- sys/vfs/hammer2/hammer2_chain.c | 47 ++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index 1616f8faf1..93025f100e 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -392,7 +392,7 @@ hammer2_chain_insert(hammer2_chain_core_t *above, hammer2_chain_layer_t *layer, nlayer = TAILQ_PREV(layer, h2_layer_list, entry); if (nlayer && RB_INSERT(hammer2_chain_tree, &nlayer->rbtree, chain) == NULL) { - atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); + layer = nlayer; goto done; } @@ -406,7 +406,7 @@ hammer2_chain_insert(hammer2_chain_core_t *above, hammer2_chain_layer_t *layer, TAILQ_INSERT_BEFORE(layer, nlayer, entry); RB_INSERT(hammer2_chain_tree, &nlayer->rbtree, chain); - atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); + layer = nlayer; goto done; } @@ -429,7 +429,7 @@ hammer2_chain_insert(hammer2_chain_core_t *above, hammer2_chain_layer_t *layer, chain->above = NULL; chain->inlayer = NULL; kprintf("insertion race against %p\n", xchain); - goto done; + goto failed; } /* @@ -444,6 +444,7 @@ hammer2_chain_insert(hammer2_chain_core_t *above, hammer2_chain_layer_t *layer, TAILQ_INSERT_HEAD(&above->layerq, layer, entry); RB_INSERT(hammer2_chain_tree, &layer->rbtree, chain); } +done: chain->inlayer = layer; ++above->chain_count; ++above->generation; @@ -453,7 +454,7 @@ hammer2_chain_insert(hammer2_chain_core_t *above, hammer2_chain_layer_t *layer, atomic_add_int(&above->live_count, 1); } atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); -done: +failed: if (flags & HAMMER2_CHAIN_INSERT_SPIN) spin_unlock(&above->cst.spin); } @@ -1814,6 +1815,16 @@ hammer2_chain_getparent(hammer2_chain_t **parentp, int how) bparent = TAILQ_FIRST(&above->ownerq); hammer2_chain_ref(bparent); + /* + * Be careful of order, oparent must be unlocked before nparent + * is locked below to avoid a deadlock. We might as well delay its + * unlocking until we conveniently no longer have the spinlock (instead + * of cycling the spinlock). + * + * Theoretically our ref on bparent should prevent elements of the + * following chain from going away and prevent above from going away, + * but we still need the spinlock to safely scan the list. + */ for (;;) { nparent = bparent; while (nparent->flags & HAMMER2_CHAIN_DUPLICATED) @@ -1821,29 +1832,23 @@ hammer2_chain_getparent(hammer2_chain_t **parentp, int how) hammer2_chain_ref(nparent); spin_unlock(&above->cst.spin); - /* - * Be careful of order - */ - hammer2_chain_unlock(oparent); + if (oparent) { + hammer2_chain_unlock(oparent); + oparent = NULL; + } hammer2_chain_lock(nparent, how | HAMMER2_RESOLVE_NOREF); hammer2_chain_drop(bparent); /* * We might have raced a delete-duplicate. */ - if (nparent->flags & HAMMER2_CHAIN_DUPLICATED) { - spin_lock(&above->cst.spin); - if (nparent->flags & HAMMER2_CHAIN_DUPLICATED) { - spin_unlock(&above->cst.spin); - hammer2_chain_ref(nparent); - hammer2_chain_unlock(nparent); - bparent = nparent; - spin_lock(&above->cst.spin); - continue; /* retry */ - } - spin_unlock(&above->cst.spin); - } - break; + if ((nparent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) + break; + bparent = nparent; + hammer2_chain_ref(bparent); + hammer2_chain_unlock(nparent); + spin_lock(&above->cst.spin); + /* retry */ } *parentp = nparent; -- 2.41.0 From 1ac92c8c857a15dc010924c5f066da404e568640 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Wed, 6 Nov 2013 20:51:23 +0100 Subject: [PATCH 11/16] strcasecmp.3: Fix wrong type name (site_t -> size_t). --- lib/libc/string/strcasecmp.3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libc/string/strcasecmp.3 b/lib/libc/string/strcasecmp.3 index 0bdd7003a9..eb65e51206 100644 --- a/lib/libc/string/strcasecmp.3 +++ b/lib/libc/string/strcasecmp.3 @@ -50,7 +50,7 @@ .Ft int .Fn strcasecmp_l "const char *s1" "const char *s2" "locale_t loc" .Ft int -.Fn strncasecmp_l "const char *s1" "const char *s2" "site_t len" "locale_t loc" +.Fn strncasecmp_l "const char *s1" "const char *s2" "size_t len" "locale_t loc" .Sh DESCRIPTION The .Fn strcasecmp -- 2.41.0 From 80e89abc73b34fc5e6520edc451a9f132bfac576 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 6 Nov 2013 17:55:06 -0800 Subject: [PATCH 12/16] kernel - Remove most buffer_map contention on 64-bit systems * Set BKVASIZE to MAXBSIZE (65536) on 64-bit systems. This has the effect of always reserving a maximal amount of KVM for each buffer cache buffer. * The change removes most buffer_map interactions once the system caches have stabilized. * The change removes the need to defragment the buffer cache. * Significant performance improvement for HAMMER1 and HAMMER2 which use larger buffers and were hitting degenerate fragmentation issues before this change. * But also results in lower buffer data density when buffering data for smaller files, so may have a slight detrimental effect on UFS and on the amount of time dirty data can be cached before being flushed to disk. * NOTE: The 64K limit is for normal buffers and is unrelated to the physical cluster buffer (pbuf) limit of 128KB (MAXPHYS). --- sys/cpu/i386/include/param.h | 11 +++++++++-- sys/cpu/x86_64/include/param.h | 4 ++-- sys/kern/vfs_bio.c | 9 +++++++-- sys/platform/pc64/x86_64/machdep.c | 12 +++++------- sys/sys/param.h | 13 +++++++++---- 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/sys/cpu/i386/include/param.h b/sys/cpu/i386/include/param.h index d6547ce984..5c0d1b40dd 100644 --- a/sys/cpu/i386/include/param.h +++ b/sys/cpu/i386/include/param.h @@ -124,8 +124,15 @@ #define MAXPHYS (128 * 1024) /* max raw I/O transfer size */ #define MAXDUMPPGS (MAXPHYS/PAGE_SIZE) -#define IOPAGES 2 /* pages of i/o permission bitmap */ -#define UPAGES 4 /* pages of u-area */ +#define IOPAGES 2 /* pages of i/o permission bitmap */ +#define UPAGES 4 /* pages of u-area */ + +/* + * 32-bit machines do not have enough KVA, improve buffer cache + * density at the cost of higher defragmentation and buffer_map + * handling overheads. + */ +#define BKVASIZE 16384 /* override 64K default */ /* * Ceiling on amount of swblock kva space, can be changed via diff --git a/sys/cpu/x86_64/include/param.h b/sys/cpu/x86_64/include/param.h index 15a489dc47..61a0b621c1 100644 --- a/sys/cpu/x86_64/include/param.h +++ b/sys/cpu/x86_64/include/param.h @@ -146,8 +146,8 @@ #define MAXPHYS (128 * 1024) /* max raw I/O transfer size */ #define MAXDUMPPGS (MAXPHYS/PAGE_SIZE) -#define IOPAGES 2 /* pages of i/o permission bitmap */ -#define UPAGES 4 /* pages of u-area */ +#define IOPAGES 2 /* pages of i/o permission bitmap */ +#define UPAGES 4 /* pages of u-area */ /* * Ceiling on amount of swblock kva space, can be changed via diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 3c2e1e21c1..25b1a74f32 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -2141,12 +2141,16 @@ restart: * If we are overcomitted then recover the buffer and its * KVM space. This occurs in rare situations when multiple * processes are blocked in getnewbuf() or allocbuf(). + * + * (We don't have to recover the KVM space if + * BKVASIZE == MAXBSIZE) */ if (bufspace >= hibufspace) flushingbufs = 1; if (flushingbufs && bp->b_kvasize != 0) { bp->b_flags |= B_INVAL; - bfreekva(bp); + if (BKVASIZE != MAXBSIZE) + bfreekva(bp); brelse(bp); goto restart; } @@ -2164,7 +2168,8 @@ restart: */ if (bp->b_refs) { bp->b_flags |= B_INVAL; - bfreekva(bp); + if (BKVASIZE != MAXBSIZE) + bfreekva(bp); brelse(bp); goto restart; } diff --git a/sys/platform/pc64/x86_64/machdep.c b/sys/platform/pc64/x86_64/machdep.c index bd49c2af6d..04e51e6c13 100644 --- a/sys/platform/pc64/x86_64/machdep.c +++ b/sys/platform/pc64/x86_64/machdep.c @@ -390,20 +390,18 @@ again: * * nbuf is an int, make sure we don't overflow the field. * - * On 64-bit systems fragmentation can create serious performance - * loss due to the large number of buffers the system is likely - * going to maintain. The easiest solution is to create a KVA - * section that is twice as big as the nominal buffer cache size, - * hence the multiplication by 2 below. + * On 64-bit systems we always reserve maximal allocations for + * buffer cache buffers and there are no fragmentation issues, + * so the KVA segment does not have to be excessively oversized. */ if ((vm_size_t)(v - firstaddr) != size) panic("startup: table size inconsistency"); kmem_suballoc(&kernel_map, &clean_map, &clean_sva, &clean_eva, - ((vm_offset_t)nbuf * BKVASIZE * 2) + + ((vm_offset_t)(nbuf + 16) * BKVASIZE) + (nswbuf * MAXPHYS) + pager_map_size); kmem_suballoc(&clean_map, &buffer_map, &buffer_sva, &buffer_eva, - ((vm_offset_t)nbuf * BKVASIZE * 2)); + ((vm_offset_t)(nbuf + 16) * BKVASIZE)); buffer_map.system_map = 1; kmem_suballoc(&clean_map, &pager_map, &pager_sva, &pager_eva, ((vm_offset_t)nswbuf * MAXPHYS) + pager_map_size); diff --git a/sys/sys/param.h b/sys/sys/param.h index 148f171f4a..2743508ebc 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -207,11 +207,16 @@ * the KVM memory reserved for the buffer cache and will wind * up with too-few buffers. * - * The default is 16384, roughly 2x the block size used by a - * normal UFS filesystem. + * By default we now use maximally-sized reservations. But on + * 32-bit machines we reduce this 16KB. Maximally-sized + * reservations greatly reduces defragmentation and buffer_map + * messing around and is more SMP-friendly. */ -#define MAXBSIZE 65536 /* must be power of 2 */ -#define BKVASIZE 16384 /* must be power of 2 */ +#define MAXBSIZE 65536 /* must be power of 2 */ +#ifndef BKVASIZE +#define BKVASIZE MAXBSIZE /* must be power of 2 */ +#endif + #define BKVAMASK (BKVASIZE-1) #define MAXFRAG 8 -- 2.41.0 From e9326fb362a3ec55db8e1ed4887fef347fad1113 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 6 Nov 2013 18:01:47 -0800 Subject: [PATCH 13/16] kernel - rename vm_map lock wmesg * Rename the vm_map lock wmesg from "thrd_sleep" to "vm_maplk" to reduce confusion when observing 'ps' output. --- sys/vm/vm_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index cee36b1c4b..bed456f6f5 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -550,7 +550,7 @@ vm_map_init(struct vm_map *map, vm_offset_t min, vm_offset_t max, pmap_t pmap) map->timestamp = 0; map->flags = 0; lwkt_token_init(&map->token, "vm_map"); - lockinit(&map->lock, "thrd_sleep", (hz + 9) / 10, 0); + lockinit(&map->lock, "vm_maplk", (hz + 9) / 10, 0); TUNABLE_INT("vm.cache_vmspaces", &vmspace_sysref_class.nom_cache); } -- 2.41.0 From 0238551e60e2dabcb1894e77d684ab0826501e58 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 6 Nov 2013 18:20:59 -0800 Subject: [PATCH 14/16] hammer2 - Stabilization * Fix bugs where cached values in the chain's shared core were being used and modified during a flush by dead chains. These values are only supposed to be used by live chains. Should fix a number of assertion panics. * Fix bug where the live_count calculation in a chain's shared core could be made based on a dead chain's block table. This calculation is also meant to only be made/used by live chains. --- sys/vfs/hammer2/hammer2_chain.c | 72 ++++++++++++++++++++++++--------- sys/vfs/hammer2/hammer2_flush.c | 2 - 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index 93025f100e..8b1cdceda9 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -1202,6 +1202,7 @@ hammer2_chain_unlock(hammer2_chain_t *chain) /* * This counts the number of live blockrefs in a block array and * also calculates the point at which all remaining blockrefs are empty. + * This routine can only be called on a live chain (DUPLICATED flag not set). * * NOTE: Flag is not set until after the count is complete, allowing * callers to test the flag without holding the spinlock. @@ -1218,6 +1219,8 @@ hammer2_chain_countbrefs(hammer2_chain_t *chain, { hammer2_chain_core_t *core = chain->core; + KKASSERT((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0); + spin_lock(&core->cst.spin); if ((core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) { if (base) { @@ -3727,21 +3730,36 @@ hammer2_base_find(hammer2_chain_t *chain, hammer2_blockref_t *scan; hammer2_key_t scan_end; int i; + int limit; + + /* + * Require the live chain's already have their core's counted + * so we can optimize operations. + */ + KKASSERT((chain->flags & HAMMER2_CHAIN_DUPLICATED) || + core->flags & HAMMER2_CORE_COUNTEDBREFS); /* * Degenerate case */ - KKASSERT(core->flags & HAMMER2_CORE_COUNTEDBREFS); if (count == 0 || base == NULL) return(count); /* - * Sequential optimization + * Sequential optimization using *cache_indexp. This is the most + * likely scenario. + * + * We can avoid trailing empty entries on live chains, otherwise + * we might have to check the whole block array. */ i = *cache_indexp; cpu_ccfence(); - if (i >= core->live_zero) - i = core->live_zero - 1; + if (chain->flags & HAMMER2_CHAIN_DUPLICATED) + limit = count; + else + limit = core->live_zero; + if (i >= limit) + i = limit - 1; if (i < 0) i = 0; KKASSERT(i < count); @@ -3770,14 +3788,14 @@ hammer2_base_find(hammer2_chain_t *chain, if (scan_end >= key_beg) break; } - if (i >= core->live_zero) + if (i >= limit) return (count); ++scan; ++i; } if (i != count) { *cache_indexp = i; - if (i >= core->live_zero) { + if (i >= limit) { i = count; } else { scan_end = scan->key + @@ -3885,12 +3903,12 @@ found: * need to be adjusted when we commit the media change. */ void -hammer2_base_delete(hammer2_chain_t *chain, +hammer2_base_delete(hammer2_chain_t *parent, hammer2_blockref_t *base, int count, int *cache_indexp, hammer2_chain_t *child) { hammer2_blockref_t *elm = &child->bref; - hammer2_chain_core_t *core = chain->core; + hammer2_chain_core_t *core = parent->core; hammer2_key_t key_next; int i; @@ -3901,7 +3919,7 @@ hammer2_base_delete(hammer2_chain_t *chain, * re-flushed in some cases. */ key_next = 0; /* max range */ - i = hammer2_base_find(chain, base, count, cache_indexp, + i = hammer2_base_find(parent, base, count, cache_indexp, &key_next, elm->key, elm->key); if (i == count || base[i].type == 0 || base[i].key != elm->key || base[i].keybits != elm->keybits) { @@ -3910,10 +3928,16 @@ hammer2_base_delete(hammer2_chain_t *chain, return; } bzero(&base[i], sizeof(*base)); - if (core->live_zero == i + 1) { - while (--i >= 0 && base[i].type == 0) - ; - core->live_zero = i + 1; + + /* + * We can only optimize core->live_zero for live chains. + */ + if ((parent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { + if (core->live_zero == i + 1) { + while (--i >= 0 && base[i].type == 0) + ; + core->live_zero = i + 1; + } } } @@ -3958,10 +3982,15 @@ hammer2_base_insert(hammer2_chain_t *parent, */ KKASSERT(i >= 0 && i <= count); + /* + * We can only optimize core->live_zero for live chains. + */ if (i == count && core->live_zero < count) { - i = core->live_zero++; - base[i] = *elm; - return; + if ((parent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { + i = core->live_zero++; + base[i] = *elm; + return; + } } xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; @@ -3992,8 +4021,15 @@ hammer2_base_insert(hammer2_chain_t *parent, bcopy(&base[i], &base[i+1], (k - i) * sizeof(hammer2_blockref_t)); base[i] = *elm; - if (core->live_zero <= k) - core->live_zero = k + 1; + + /* + * We can only update core->live_zero for live + * chains. + */ + if ((parent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { + if (core->live_zero <= k) + core->live_zero = k + 1; + } u = 2; goto validate; } diff --git a/sys/vfs/hammer2/hammer2_flush.c b/sys/vfs/hammer2/hammer2_flush.c index de2185a75e..c0e7d79dff 100644 --- a/sys/vfs/hammer2/hammer2_flush.c +++ b/sys/vfs/hammer2/hammer2_flush.c @@ -1398,8 +1398,6 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) ((parent->flags & HAMMER2_CHAIN_DESTROYED) || parent->bref.type != HAMMER2_BREF_TYPE_INODE)) { base = NULL; - } else if ((parent->core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) { - hammer2_chain_countbrefs(parent, base, count); } /* -- 2.41.0 From c2f95d8a2185be01498f8b94a1cd429fbd520d45 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 7 Nov 2013 11:21:12 -0800 Subject: [PATCH 15/16] kernel - Fix memory leak, clean up kernel slab fragmentation * The kernel slab allocator collects off-cpu kfree()s in z_RChunks. This linked list is freed up by the owning cpu. However, zones which are not at the head of the list can wind up with non-NULL z_RChunks which never get freed. * Add a 10-second callout on every cpu to clean-up these stale zones, giving the kernel a chance to free the related zones. * This was probably the cause for other kernel memory leaks reported in the past. Found-by: sephe --- sys/kern/kern_slaballoc.c | 145 ++++++++++++++++++++++++-------------- sys/kern/kern_timeout.c | 22 ++++++ sys/sys/malloc.h | 1 + 3 files changed, 116 insertions(+), 52 deletions(-) diff --git a/sys/kern/kern_slaballoc.c b/sys/kern/kern_slaballoc.c index 95f42d13d0..eeb9bd056e 100644 --- a/sys/kern/kern_slaballoc.c +++ b/sys/kern/kern_slaballoc.c @@ -477,6 +477,61 @@ zoneindex(unsigned long *bytes, unsigned long *align) return(0); } +static __inline +void +clean_zone_rchunks(SLZone *z) +{ + SLChunk *bchunk; + + while ((bchunk = z->z_RChunks) != NULL) { + cpu_ccfence(); + if (atomic_cmpset_ptr(&z->z_RChunks, bchunk, NULL)) { + *z->z_LChunksp = bchunk; + while (bchunk) { + chunk_mark_free(z, bchunk); + z->z_LChunksp = &bchunk->c_Next; + bchunk = bchunk->c_Next; + ++z->z_NFree; + } + break; + } + /* retry */ + } +} + +/* + * If the zone becomes totally free, and there are other zones we + * can allocate from, move this zone to the FreeZones list. Since + * this code can be called from an IPI callback, do *NOT* try to mess + * with kernel_map here. Hysteresis will be performed at malloc() time. + */ +static __inline +SLZone * +check_zone_free(SLGlobalData *slgd, SLZone *z) +{ + if (z->z_NFree == z->z_NMax && + (z->z_Next || slgd->ZoneAry[z->z_ZoneIndex] != z) && + z->z_RCount == 0 + ) { + SLZone **pz; + int *kup; + + for (pz = &slgd->ZoneAry[z->z_ZoneIndex]; z != *pz; pz = &(*pz)->z_Next) + ; + *pz = z->z_Next; + z->z_Magic = -1; + z->z_Next = slgd->FreeZones; + slgd->FreeZones = z; + ++slgd->NFreeZones; + kup = btokup(z); + *kup = 0; + z = *pz; + } else { + z = z->z_Next; + } + return z; +} + #ifdef SLAB_DEBUG /* * Used to debug memory corruption issues. Record up to (typically 32) @@ -544,7 +599,6 @@ kmalloc(unsigned long size, struct malloc_type *type, int flags) { SLZone *z; SLChunk *chunk; - SLChunk *bchunk; SLGlobalData *slgd; struct globaldata *gd; unsigned long align; @@ -713,19 +767,8 @@ kmalloc(unsigned long size, struct malloc_type *type, int flags) if (z->z_RChunks == NULL) atomic_swap_int(&z->z_RSignal, 1); - while ((bchunk = z->z_RChunks) != NULL) { - cpu_ccfence(); - if (atomic_cmpset_ptr(&z->z_RChunks, bchunk, NULL)) { - *z->z_LChunksp = bchunk; - while (bchunk) { - chunk_mark_free(z, bchunk); - z->z_LChunksp = &bchunk->c_Next; - bchunk = bchunk->c_Next; - ++z->z_NFree; - } - break; - } - } + clean_zone_rchunks(z); + /* * Remove from the zone list if no free chunks remain. * Clear RSignal @@ -1022,7 +1065,6 @@ void kfree_remote(void *ptr) { SLGlobalData *slgd; - SLChunk *bchunk; SLZone *z; int nfree; int *kup; @@ -1053,19 +1095,7 @@ kfree_remote(void *ptr) * cache mastership of the related data (not that it helps since * we are using c_Next). */ - while ((bchunk = z->z_RChunks) != NULL) { - cpu_ccfence(); - if (atomic_cmpset_ptr(&z->z_RChunks, bchunk, NULL)) { - *z->z_LChunksp = bchunk; - while (bchunk) { - chunk_mark_free(z, bchunk); - z->z_LChunksp = &bchunk->c_Next; - bchunk = bchunk->c_Next; - ++z->z_NFree; - } - break; - } - } + clean_zone_rchunks(z); if (z->z_NFree && nfree == 0) { z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; slgd->ZoneAry[z->z_ZoneIndex] = z; @@ -1101,7 +1131,7 @@ kfree_remote(void *ptr) kup = btokup(z); *kup = 0; } - logmemory(free_rem_end, z, bchunk, 0L, 0); + logmemory(free_rem_end, z, NULL, 0L, 0); } /* @@ -1263,7 +1293,8 @@ kfree(void *ptr, struct malloc_type *type) * We can use a passive IPI to reduce overhead even further. */ if (bchunk == NULL && rsignal) { - logmemory(free_request, ptr, type, (unsigned long)z->z_ChunkSize, 0); + logmemory(free_request, ptr, type, + (unsigned long)z->z_ChunkSize, 0); lwkt_send_ipiq_passive(z->z_CpuGd, kfree_remote, z); /* z can get ripped out from under us from this point on */ } else if (rsignal) { @@ -1326,30 +1357,40 @@ kfree(void *ptr, struct malloc_type *type) --type->ks_inuse[z->z_Cpu]; type->ks_memuse[z->z_Cpu] -= z->z_ChunkSize; - /* - * If the zone becomes totally free, and there are other zones we - * can allocate from, move this zone to the FreeZones list. Since - * this code can be called from an IPI callback, do *NOT* try to mess - * with kernel_map here. Hysteresis will be performed at malloc() time. - */ - if (z->z_NFree == z->z_NMax && - (z->z_Next || slgd->ZoneAry[z->z_ZoneIndex] != z) && - z->z_RCount == 0 - ) { - SLZone **pz; - int *kup; + check_zone_free(slgd, z); + logmemory_quick(free_end); + crit_exit(); +} - for (pz = &slgd->ZoneAry[z->z_ZoneIndex]; z != *pz; pz = &(*pz)->z_Next) - ; - *pz = z->z_Next; - z->z_Magic = -1; - z->z_Next = slgd->FreeZones; - slgd->FreeZones = z; - ++slgd->NFreeZones; - kup = btokup(z); - *kup = 0; +/* + * Cleanup slabs which are hanging around due to RChunks. Called once every + * 10 seconds on all cpus. + */ +void +slab_cleanup(void) +{ + SLGlobalData *slgd = &mycpu->gd_slab; + SLZone *z; + int i; + + crit_enter(); + for (i = 0; i < NZONES; ++i) { + if ((z = slgd->ZoneAry[i]) == NULL) + continue; + z = z->z_Next; + + /* + * Scan zones starting with the second zone in each list. + */ + while (z) { + /* + * Shift all RChunks to the end of the LChunks list. This is + * an O(1) operation. + */ + clean_zone_rchunks(z); + z = check_zone_free(slgd, z); + } } - logmemory_quick(free_end); crit_exit(); } diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 6ad002d5cd..00de48d91b 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -132,6 +132,7 @@ static int callwheelmask; static struct softclock_pcpu softclock_pcpu_ary[MAXCPU]; static void softclock_handler(void *arg); +static void slotimer_callback(void *arg); static void swi_softclock_setup(void *arg) @@ -245,10 +246,18 @@ softclock_handler(void *arg) softclock_pcpu_t sc; struct callout *c; struct callout_tailq *bucket; + struct callout slotimer; void (*c_func)(void *); void *c_arg; int mpsafe = 1; + /* + * Setup pcpu slow clocks which we want to run from the callout + * thread. + */ + callout_init(&slotimer); + callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer); + /* * Run the callout thread at the same priority as other kernel * threads so it can be round-robined. @@ -310,6 +319,19 @@ loop: /* NOT REACHED */ } +/* + * A very slow system cleanup timer (10 second interval), + * per-cpu. + */ +void +slotimer_callback(void *arg) +{ + struct callout *c = arg; + + slab_cleanup(); + callout_reset(c, hz * 10, slotimer_callback, c); +} + /* * New interface; clients allocate their own callout structures. * diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index 080e35a3bc..270c06b786 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -209,6 +209,7 @@ void *kmalloc_cachealign (unsigned long size, struct malloc_type *type, int flags); void kfree (void *addr, struct malloc_type *type); long kmalloc_limit (struct malloc_type *type); +void slab_cleanup(void); #endif /* _KERNEL */ -- 2.41.0 From 37ab3e78f140de9cdfdc88c255015e0d72b8bcc7 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 7 Nov 2013 11:51:34 -0800 Subject: [PATCH 16/16] debug - Sync debug utilites, add a few more * Sync debug utilites to recent changes and add a few more. * Enhance zallocinfo --- test/debug/checkhammer.c | 169 +++++++++++++++++++++++ test/debug/crc32.c | 22 +++ test/debug/{zallocinfo.c => ksyscalls.c} | 88 +++--------- test/debug/{zallocinfo.c => slabinfo.c} | 112 ++++++++------- test/debug/{zallocinfo.c => vmobjinfo.c} | 106 +++++--------- test/debug/vmpageinfo.c | 4 + test/debug/zallocinfo.c | 26 ++++ 7 files changed, 332 insertions(+), 195 deletions(-) create mode 100644 test/debug/checkhammer.c create mode 100644 test/debug/crc32.c copy test/debug/{zallocinfo.c => ksyscalls.c} (60%) copy test/debug/{zallocinfo.c => slabinfo.c} (65%) copy test/debug/{zallocinfo.c => vmobjinfo.c} (61%) diff --git a/test/debug/checkhammer.c b/test/debug/checkhammer.c new file mode 100644 index 0000000000..a2a595cafa --- /dev/null +++ b/test/debug/checkhammer.c @@ -0,0 +1,169 @@ +/* + * checkhammer.c + * + * checkhammer blockmapdump btreedump + */ + +#include +#include +#include +#include +#include +#include + +struct rbmap_tree; +struct rbmap; + +static void parseBlockMap(FILE *fp); +static void parseBTree(FILE *fp); +static void dumpResults(void); +static int rbmap_cmp(struct rbmap *, struct rbmap *); + +typedef u_int64_t hammer_off_t; +typedef struct rbmap *rbmap_t; + +RB_HEAD(rbmap_tree, rbmap); +RB_PROTOTYPE2(rbmap_tree, rbmap, rbentry, rbmap_cmp, hammer_off_t); + +struct rbmap { + RB_ENTRY(rbmap) rbentry; + hammer_off_t base; + long app; + long free; + long bytes; + int zone; +}; + +RB_GENERATE2(rbmap_tree, rbmap, rbentry, rbmap_cmp, hammer_off_t, base); + +struct rbmap_tree rbroot; + +static +int +rbmap_cmp(struct rbmap *rb1, struct rbmap *rb2) +{ + if (rb1->base < rb2->base) + return(-1); + if (rb1->base > rb2->base) + return(1); + return(0); +} + +int +main(int ac, char **av) +{ + FILE *fp; + + if (ac != 3) { + fprintf(stderr, "checkhammer blockmapdump btreedump\n"); + exit(1); + } + if ((fp = fopen(av[1], "r")) == NULL) { + fprintf(stderr, "Unable to open %s\n", av[1]); + exit(1); + } + + RB_INIT(&rbroot); + parseBlockMap(fp); + fclose(fp); + if ((fp = fopen(av[2], "r")) == NULL) { + fprintf(stderr, "Unable to open %s\n", av[1]); + exit(1); + } + parseBTree(fp); + fclose(fp); + + dumpResults(); + return(0); +} + +static void +parseBlockMap(FILE *fp) +{ + char buf[1024]; + rbmap_t map; + int zone; + long long base; + long long app; + long long free; + + while (fgets(buf, sizeof(buf), fp) != NULL) { + if (sscanf(buf, " 4%llx zone=%d app=%lld free=%lld", + &base, &zone, &app, &free) != 4) + continue; + if (RB_LOOKUP(rbmap_tree, &rbroot, (hammer_off_t)base)) + continue; + map = malloc(sizeof(*map)); + map->base = (hammer_off_t)base; + map->app = (long)app; + map->free = (long)free; + map->zone = zone; + map->bytes = 0; + RB_INSERT(rbmap_tree, &rbroot, map); + } +} + +static void +parseBTree(FILE *fp) +{ + char buf[1024]; + rbmap_t map; + long long base; + long long bytes; + + while (fgets(buf, sizeof(buf), fp) != NULL) { + if (sscanf(buf, " NODE 8%llx", &base) == 1) { + base &= 0x0FFFFFFFFF800000LLU; + map = RB_LOOKUP(rbmap_tree, &rbroot, base); + if (map == NULL) { + printf("(not in blockmap): %s", buf); + continue; + } + map->bytes += 4096; + } + if (sscanf(buf, " dataoff=%llx/%lld", + &base, &bytes) == 2) { + base &= 0x0FFFFFFFFF800000LLU; + map = RB_LOOKUP(rbmap_tree, &rbroot, base); + if (map == NULL) { + printf("(not in blockmap): %s", buf); + continue; + } + map->bytes += (bytes + 15) & ~15; + } + } +} + +static void +dumpResults(void) +{ + rbmap_t map; + hammer_off_t bfree; + + printf("mismatches: (blockmap, actual)\n"); + RB_FOREACH(map, rbmap_tree, &rbroot) { + bfree = 8192 * 1024 - (int64_t)map->bytes; + + /* + * Ignore matches + */ + if (map->free == bfree) + continue; + + /* + * If the block is completely allocated but our calculations + * show nobody is referencing it it is probably an undo, + * blockmap, or unavailable reserved area. + */ + if (map->free == 0 && bfree == 8192 * 1024) { + if (map->zone == 3 || map->zone == 4 || + map->zone == 15) + continue; + } + + printf(" bmap %016jx %jd %jd\n", + map->base, + (intmax_t)(int64_t)map->free, + (intmax_t)(int64_t)bfree); + } +} diff --git a/test/debug/crc32.c b/test/debug/crc32.c new file mode 100644 index 0000000000..cf7b275bbd --- /dev/null +++ b/test/debug/crc32.c @@ -0,0 +1,22 @@ + +/* + * cc crc32.c /usr/src/sys/libkern/crc32.c -o /usr/local/bin/crc32 + */ + +#include +#include +#include +#include + +int +main(int ac, char **av) +{ + char buf[256]; + int n; + u_int32_t crc = crc32(NULL, 0); + + while ((n = read(0, buf, sizeof(buf))) > 0) + crc = crc32_ext(buf, n, crc); + printf("crc %08x\n", crc); + return(0); +} diff --git a/test/debug/zallocinfo.c b/test/debug/ksyscalls.c similarity index 60% copy from test/debug/zallocinfo.c copy to test/debug/ksyscalls.c index f17f8bf72d..11250d1120 100644 --- a/test/debug/zallocinfo.c +++ b/test/debug/ksyscalls.c @@ -1,13 +1,11 @@ /* - * ZALLOCINFO.C + * KSYSCALLS.C * - * cc -I/usr/src/sys zallocinfo.c -o /usr/local/bin/zallocinfo -lkvm + * cc -I/usr/src/sys ksyscalls.c -o /usr/local/bin/ksyscalls -lkvm * - * zallocinfo + * Dump syscall debugging info * - * Print the slab structure and chains for all cpus. - * - * Copyright (c) 2010 The DragonFly Project. All rights reserved. + * Copyright (c) 2011 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -44,38 +42,33 @@ #include #include #include -#include #include -#include -#include +#include +#include +#include #include #include #include #include #include +#include #include #include #include #include #include -#include #include #include #include #include struct nlist Nl[] = { - { "_CPU_prvspace" }, - { "_ncpus" }, + { "_SysCallsWorstCase" }, { NULL } }; -int debugopt; -int verboseopt; - -static void dumpslab(kvm_t *kd, int cpu, struct SLGlobalData *slab); static void kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes); int @@ -83,21 +76,13 @@ main(int ac, char **av) { const char *corefile = NULL; const char *sysfile = NULL; - struct SLGlobalData slab; kvm_t *kd; - int offset; - int ncpus; int ch; int i; + uint64_t syscallsworstcase[SYS_MAXSYSCALL]; - while ((ch = getopt(ac, av, "M:N:dv")) != -1) { + while ((ch = getopt(ac, av, "M:N:")) != -1) { switch(ch) { - case 'd': - ++debugopt; - break; - case 'v': - ++verboseopt; - break; case 'M': corefile = optarg; break; @@ -120,58 +105,19 @@ main(int ac, char **av) perror("kvm_nlist"); exit(1); } + kkread(kd, Nl[0].n_value, syscallsworstcase, sizeof(syscallsworstcase)); - kkread(kd, Nl[1].n_value, &ncpus, sizeof(ncpus)); - offset = offsetof(struct privatespace, mdglobaldata.mi.gd_slab); - for (i = 0; i < ncpus; ++i) { - kkread(kd, Nl[0].n_value + sizeof(struct privatespace) * i + offset, &slab, sizeof(slab)); - dumpslab(kd, i, &slab); - } - printf("Done\n"); - return(0); -} - -static void -dumpslab(kvm_t *kd, int cpu, struct SLGlobalData *slab) -{ - struct SLZone *zonep; - struct SLZone zone; - int i; - int first; - int64_t save; - int64_t extra = 0; - - printf("cpu %d NFreeZones=%d\n", cpu, slab->NFreeZones); - - for (i = 0; i < NZONES; ++i) { - if ((zonep = slab->ZoneAry[i]) == NULL) - continue; - printf(" zone %2d", i); - first = 1; - save = extra; - while (zonep) { - kkread(kd, (u_long)zonep, &zone, sizeof(zone)); - if (first) { - printf(" chunk=%-5d elms=%-4d free:", - zone.z_ChunkSize, zone.z_NMax); - } - if (first == 0) - printf(","); - printf(" %d", zone.z_NFree); - extra += zone.z_NFree * zone.z_ChunkSize; - zonep = zone.z_Next; - first = 0; - } - printf(" (%jdK free)\n", (intmax_t)(extra - save) / 1024); + for (i = 0; i < SYS_MAXSYSCALL; ++i) { + if (syscallsworstcase[i]) + printf("call %3d %6jduS\n", i, (intmax_t)syscallsworstcase[i]); } - printf(" TotalUnused %jdM\n", (intmax_t)extra / 1024 / 1024); } static void kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes) { if (kvm_read(kd, addr, buf, nbytes) != nbytes) { - perror("kvm_read"); - exit(1); + perror("kvm_read"); + exit(1); } } diff --git a/test/debug/zallocinfo.c b/test/debug/slabinfo.c similarity index 65% copy from test/debug/zallocinfo.c copy to test/debug/slabinfo.c index f17f8bf72d..47a9ca58ac 100644 --- a/test/debug/zallocinfo.c +++ b/test/debug/slabinfo.c @@ -1,13 +1,13 @@ /* - * ZALLOCINFO.C + * SLABINFO.C * - * cc -I/usr/src/sys zallocinfo.c -o /usr/local/bin/zallocinfo -lkvm + * cc -I/usr/src/sys slabinfo.c -o /usr/local/bin/slabinfo -lkvm * - * zallocinfo + * slabinfo * - * Print the slab structure and chains for all cpus. + * dump kernel slab allocator pcpu data and chains * - * Copyright (c) 2010 The DragonFly Project. All rights reserved. + * Copyright (c) 2012 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -40,14 +40,16 @@ * SUCH DAMAGE. */ -#define _KERNEL_STRUCTURES_ +#define _KERNEL_STRUCTURES #include #include #include -#include #include +#include +#include #include #include +#include #include #include @@ -60,7 +62,6 @@ #include #include #include -#include #include #include #include @@ -75,20 +76,22 @@ struct nlist Nl[] = { int debugopt; int verboseopt; -static void dumpslab(kvm_t *kd, int cpu, struct SLGlobalData *slab); -static void kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes); +int slzonedump(kvm_t *kd, SLZone *kslz); +void kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes); int main(int ac, char **av) { const char *corefile = NULL; const char *sysfile = NULL; - struct SLGlobalData slab; kvm_t *kd; - int offset; - int ncpus; int ch; int i; + int j; + int ncpus; + int totalzones; + int totalfree; + struct globaldata gd; while ((ch = getopt(ac, av, "M:N:dv")) != -1) { switch(ch) { @@ -121,57 +124,60 @@ main(int ac, char **av) exit(1); } - kkread(kd, Nl[1].n_value, &ncpus, sizeof(ncpus)); - offset = offsetof(struct privatespace, mdglobaldata.mi.gd_slab); + kkread(kd, Nl[1].n_value, &ncpus, sizeof(int)); + totalzones = 0; + totalfree = 0; for (i = 0; i < ncpus; ++i) { - kkread(kd, Nl[0].n_value + sizeof(struct privatespace) * i + offset, &slab, sizeof(slab)); - dumpslab(kd, i, &slab); + kkread(kd, Nl[0].n_value + i * sizeof(struct privatespace), &gd, sizeof(gd)); + printf("CPU %02d (NFreeZones=%d) {\n", + i, gd.gd_slab.NFreeZones); + totalfree += gd.gd_slab.NFreeZones; + + for (j = 0; j < NZONES; ++j) { + printf(" Zone %02d {\n", j); + totalzones += slzonedump(kd, gd.gd_slab.ZoneAry[j]); + printf(" }\n"); + } + + printf(" FreeZone {\n"); + totalzones += slzonedump(kd, gd.gd_slab.FreeZones); + printf(" }\n"); + + printf(" FreeOVZon {\n"); + totalzones += slzonedump(kd, gd.gd_slab.FreeOvZones); + printf(" }\n"); + + printf("}\n"); } - printf("Done\n"); + printf("TotalZones %d x 131072 = %jd\n", + totalzones, (intmax_t)totalzones * 131072LL); + printf("TotalFree %d x 131072 = %jd\n", + totalfree, (intmax_t)totalfree * 131072LL); return(0); } -static void -dumpslab(kvm_t *kd, int cpu, struct SLGlobalData *slab) +int +slzonedump(kvm_t *kd, SLZone *kslz) { - struct SLZone *zonep; - struct SLZone zone; - int i; - int first; - int64_t save; - int64_t extra = 0; - - printf("cpu %d NFreeZones=%d\n", cpu, slab->NFreeZones); - - for (i = 0; i < NZONES; ++i) { - if ((zonep = slab->ZoneAry[i]) == NULL) - continue; - printf(" zone %2d", i); - first = 1; - save = extra; - while (zonep) { - kkread(kd, (u_long)zonep, &zone, sizeof(zone)); - if (first) { - printf(" chunk=%-5d elms=%-4d free:", - zone.z_ChunkSize, zone.z_NMax); - } - if (first == 0) - printf(","); - printf(" %d", zone.z_NFree); - extra += zone.z_NFree * zone.z_ChunkSize; - zonep = zone.z_Next; - first = 0; - } - printf(" (%jdK free)\n", (intmax_t)(extra - save) / 1024); + SLZone slz; + int count = 0; + + while (kslz) { + kkread(kd, (u_long)kslz, &slz, sizeof(slz)); + printf("\t{ magic=%08x cpu=%d chunking=%d NFree=%d/%d RCnt=%d}\n", + slz.z_Magic, slz.z_Cpu, slz.z_ChunkSize, + slz.z_NFree, slz.z_NMax, slz.z_RCount); + kslz = slz.z_Next; + ++count; } - printf(" TotalUnused %jdM\n", (intmax_t)extra / 1024 / 1024); + return(count); } -static void +void kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes) { if (kvm_read(kd, addr, buf, nbytes) != nbytes) { - perror("kvm_read"); - exit(1); + perror("kvm_read"); + exit(1); } } diff --git a/test/debug/zallocinfo.c b/test/debug/vmobjinfo.c similarity index 61% copy from test/debug/zallocinfo.c copy to test/debug/vmobjinfo.c index f17f8bf72d..796aa6dcd7 100644 --- a/test/debug/zallocinfo.c +++ b/test/debug/vmobjinfo.c @@ -1,11 +1,9 @@ /* - * ZALLOCINFO.C + * VMOBJINFO.C * - * cc -I/usr/src/sys zallocinfo.c -o /usr/local/bin/zallocinfo -lkvm + * cc -I/usr/src/sys vmobjinfo.c -o /usr/local/bin/vmobjinfo -lkvm * - * zallocinfo - * - * Print the slab structure and chains for all cpus. + * Dump all vm_object's in the system * * Copyright (c) 2010 The DragonFly Project. All rights reserved. * @@ -40,64 +38,56 @@ * SUCH DAMAGE. */ -#define _KERNEL_STRUCTURES_ +#define _KERNEL_STRUCTURES #include #include #include -#include #include -#include -#include +#include +#include +#include +#include #include #include #include -#include #include #include #include +#include +#include + #include #include #include -#include #include #include #include #include +TAILQ_HEAD(object_q, vm_object); + struct nlist Nl[] = { - { "_CPU_prvspace" }, - { "_ncpus" }, + { "_vm_object_list" }, { NULL } }; -int debugopt; -int verboseopt; - -static void dumpslab(kvm_t *kd, int cpu, struct SLGlobalData *slab); +static void scan_vmobjs(kvm_t *kd, struct object_q *obj_list); static void kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes); int main(int ac, char **av) { - const char *corefile = NULL; - const char *sysfile = NULL; - struct SLGlobalData slab; + struct object_q obj_list; kvm_t *kd; - int offset; - int ncpus; - int ch; int i; + int ch; + const char *corefile = NULL; + const char *sysfile = NULL; - while ((ch = getopt(ac, av, "M:N:dv")) != -1) { + while ((ch = getopt(ac, av, "M:N:")) != -1) { switch(ch) { - case 'd': - ++debugopt; - break; - case 'v': - ++verboseopt; - break; case 'M': corefile = optarg; break; @@ -109,8 +99,6 @@ main(int ac, char **av) exit(1); } } - ac -= optind; - av += optind; if ((kd = kvm_open(sysfile, corefile, NULL, O_RDONLY, "kvm:")) == NULL) { perror("kvm_open"); @@ -120,58 +108,34 @@ main(int ac, char **av) perror("kvm_nlist"); exit(1); } - - kkread(kd, Nl[1].n_value, &ncpus, sizeof(ncpus)); - offset = offsetof(struct privatespace, mdglobaldata.mi.gd_slab); - for (i = 0; i < ncpus; ++i) { - kkread(kd, Nl[0].n_value + sizeof(struct privatespace) * i + offset, &slab, sizeof(slab)); - dumpslab(kd, i, &slab); - } - printf("Done\n"); + kkread(kd, Nl[0].n_value, &obj_list, sizeof(obj_list)); + scan_vmobjs(kd, &obj_list); return(0); } static void -dumpslab(kvm_t *kd, int cpu, struct SLGlobalData *slab) +scan_vmobjs(kvm_t *kd, struct object_q *obj_list) { - struct SLZone *zonep; - struct SLZone zone; - int i; - int first; - int64_t save; - int64_t extra = 0; + struct vm_object *op; + struct vm_object obj; - printf("cpu %d NFreeZones=%d\n", cpu, slab->NFreeZones); + op = TAILQ_FIRST(obj_list); + while (op) { + kkread(kd, (long)op, &obj, sizeof(obj)); - for (i = 0; i < NZONES; ++i) { - if ((zonep = slab->ZoneAry[i]) == NULL) - continue; - printf(" zone %2d", i); - first = 1; - save = extra; - while (zonep) { - kkread(kd, (u_long)zonep, &zone, sizeof(zone)); - if (first) { - printf(" chunk=%-5d elms=%-4d free:", - zone.z_ChunkSize, zone.z_NMax); - } - if (first == 0) - printf(","); - printf(" %d", zone.z_NFree); - extra += zone.z_NFree * zone.z_ChunkSize; - zonep = zone.z_Next; - first = 0; - } - printf(" (%jdK free)\n", (intmax_t)(extra - save) / 1024); + printf("%p type=%d size=%016jx handle=%p swblocks=%d\n", + op, obj.type, (intmax_t)obj.size, obj.handle, + obj.swblock_count); + + op = TAILQ_NEXT(&obj, object_list); } - printf(" TotalUnused %jdM\n", (intmax_t)extra / 1024 / 1024); } static void kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes) { if (kvm_read(kd, addr, buf, nbytes) != nbytes) { - perror("kvm_read"); - exit(1); + perror("kvm_read"); + exit(1); } } diff --git a/test/debug/vmpageinfo.c b/test/debug/vmpageinfo.c index 049931fc65..349ef172c6 100644 --- a/test/debug/vmpageinfo.c +++ b/test/debug/vmpageinfo.c @@ -246,11 +246,15 @@ main(int ac, char **av) printf(" RAM"); if (m.flags & PG_SWAPPED) printf(" SWAPPED"); +#if 0 if (m.flags & PG_SLAB) printf(" SLAB"); +#endif printf("\n"); +#if 0 if (m.flags & PG_SLAB) addsltrack(&m); +#endif } } if (debugopt || verboseopt) diff --git a/test/debug/zallocinfo.c b/test/debug/zallocinfo.c index f17f8bf72d..c36923008d 100644 --- a/test/debug/zallocinfo.c +++ b/test/debug/zallocinfo.c @@ -136,7 +136,10 @@ dumpslab(kvm_t *kd, int cpu, struct SLGlobalData *slab) { struct SLZone *zonep; struct SLZone zone; + SLChunk *chunkp; + SLChunk chunk; int i; + int rcount; int first; int64_t save; int64_t extra = 0; @@ -161,6 +164,29 @@ dumpslab(kvm_t *kd, int cpu, struct SLGlobalData *slab) extra += zone.z_NFree * zone.z_ChunkSize; zonep = zone.z_Next; first = 0; + + chunkp = zone.z_RChunks; + rcount = 0; + while (chunkp) { + kkread(kd, (u_long)chunkp, &chunk, sizeof(chunk)); + chunkp = chunk.c_Next; + ++rcount; + } + if (rcount) { + printf(" rchunks=%d", rcount); + extra += rcount * zone.z_ChunkSize; + } + chunkp = zone.z_LChunks; + rcount = 0; + while (chunkp) { + kkread(kd, (u_long)chunkp, &chunk, sizeof(chunk)); + chunkp = chunk.c_Next; + ++rcount; + } + if (rcount) { + printf(" lchunks=%d", rcount); + extra += rcount * zone.z_ChunkSize; + } } printf(" (%jdK free)\n", (intmax_t)(extra - save) / 1024); } -- 2.41.0