From 618537cfaa49437046c077c8f6699125520cbf5b Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 18 Oct 2017 20:04:43 -0700 Subject: [PATCH] kernel - Make certain sysctl's unlocked * Automatically flag all SYSCTL_[U]INT, [U]LONG, and [U]QUAD definitions CTLFLAG_NOLOCK. These do not have to be locked. Will improve program startup performance a tad. * Flag a ton of other sysctls used in program startup and also 'ps' CTLFLAG_NOLOCK. * For kern.hostname, interlock changes using XLOCK and allow the sysctl to run NOLOCK, avoiding unnecessary cache line bouncing. --- sys/kern/kern_mib.c | 31 +++++++++++++++++------- sys/kern/kern_nrandom.c | 2 +- sys/kern/kern_proc.c | 48 +++++++++++++++++++++++++------------- sys/kern/kern_sysctl.c | 8 +++++++ sys/sys/sysctl.h | 46 ++++++++++++++++++++++-------------- sys/vfs/devfs/devfs_core.c | 10 ++++---- sys/vm/vm_meter.c | 5 ++-- 7 files changed, 102 insertions(+), 48 deletions(-) diff --git a/sys/kern/kern_mib.c b/sys/kern/kern_mib.c index 28a773fbcc..a2c41d1149 100644 --- a/sys/kern/kern_mib.c +++ b/sys/kern/kern_mib.c @@ -76,22 +76,22 @@ SYSCTL_NODE(, OID_AUTO, compat, CTLFLAG_RW, 0, SYSCTL_NODE(, OID_AUTO, security, CTLFLAG_RW, 0, "Security"); -SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD, +SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD | CTLFLAG_NOLOCK, kern_ident, 0, "Kernel identifier"); -SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD, +SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD | CTLFLAG_NOLOCK, osrelease, 0, "Operating system type"); SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD, 0, BSD, "Operating system revision"); -SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD, +SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD | CTLFLAG_NOLOCK, version, 0, "Kernel version"); -SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD, +SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD | CTLFLAG_NOLOCK, ostype, 0, "Operating system type"); -SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD, +SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD, &osreldate, 0, "Operating system release date"); SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RD, @@ -141,15 +141,22 @@ SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD, 0, PAGE_SIZE, "System memory page size"); static char platform[] = MACHINE_PLATFORM; -SYSCTL_STRING(_hw, HW_MACHINE_PLATFORM, platform, CTLFLAG_RD, +SYSCTL_STRING(_hw, HW_MACHINE_PLATFORM, platform, CTLFLAG_RD | CTLFLAG_NOLOCK, platform, 0, "Platform architecture"); static char machine_arch[] = MACHINE_ARCH; -SYSCTL_STRING(_hw, HW_MACHINE_ARCH, machine_arch, CTLFLAG_RD, +SYSCTL_STRING(_hw, HW_MACHINE_ARCH, machine_arch, CTLFLAG_RD | CTLFLAG_NOLOCK, machine_arch, 0, "Cpu architecture"); char hostname[MAXHOSTNAMELEN]; +/* + * Hostname sysctl handler. We use CTLFLAG_NOLOCK to avoid acquiring + * the per-oid lock. The per-cpu SLOCK is still acquired, so to interlock + * against setting the hostname we relock with XLOCK. The result is + * that the critical path (just reading the hostname) gets one less lock + * and will have improved performance. + */ static int sysctl_hostname(SYSCTL_HANDLER_ARGS) { @@ -157,6 +164,10 @@ sysctl_hostname(SYSCTL_HANDLER_ARGS) struct proc *p = td ? td->td_proc : NULL; int error; + if (req->newptr) { + SYSCTL_SUNLOCK(); + SYSCTL_XLOCK(); + } if (p && p->p_ucred->cr_prison) { if (!jail_set_hostname_allowed && req->newptr) return(EPERM); @@ -167,11 +178,15 @@ sysctl_hostname(SYSCTL_HANDLER_ARGS) error = sysctl_handle_string(oidp, hostname, sizeof hostname, req); } + if (req->newptr) { + SYSCTL_XUNLOCK(); + SYSCTL_SLOCK(); + } return (error); } SYSCTL_PROC(_kern, KERN_HOSTNAME, hostname, - CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON, + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_NOLOCK, 0, 0, sysctl_hostname, "A", "Hostname"); int securelevel = -1; diff --git a/sys/kern/kern_nrandom.c b/sys/kern/kern_nrandom.c index 82a83d54b9..bf98c2ece4 100644 --- a/sys/kern/kern_nrandom.c +++ b/sys/kern/kern_nrandom.c @@ -448,7 +448,7 @@ SYSCTL_PROC(_kern, OID_AUTO, rand_mode, CTLTYPE_STRING | CTLFLAG_RW, NULL, 0, /* - * Called from early boot + * Called from early boot (pre-SMP) */ void rand_initialize(void) diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 89be94962b..c43d2342c9 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -1928,51 +1928,67 @@ sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS) SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table"); -SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT, +SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, + CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_NOLOCK, 0, 0, sysctl_kern_proc, "S,proc", "Return entire process table"); -SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, (KERN_PROC_ALL | KERN_PROC_FLAG_LWP), all_lwp, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, (KERN_PROC_ALL | KERN_PROC_FLAG_LWP), all_lwp, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_FLAG_LWP), pgrp_lwp, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_FLAG_LWP), pgrp_lwp, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_FLAG_LWP), tty_lwp, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_FLAG_LWP), tty_lwp, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_FLAG_LWP), uid_lwp, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_FLAG_LWP), uid_lwp, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_FLAG_LWP), ruid_lwp, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_FLAG_LWP), ruid_lwp, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_FLAG_LWP), pid_lwp, CTLFLAG_RD, +SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_FLAG_LWP), pid_lwp, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc, "Process table"); -SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, CTLFLAG_RW | CTLFLAG_ANYBODY, +SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, + CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_NOLOCK, sysctl_kern_proc_args, "Process argument list"); -SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD | CTLFLAG_ANYBODY, +SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, + CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_NOLOCK, sysctl_kern_proc_cwd, "Process argument list"); -static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD, +static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, + CTLFLAG_RD | CTLFLAG_NOLOCK, sysctl_kern_proc_pathname, "Process executable path"); -SYSCTL_PROC(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD|CTLTYPE_STRUCT, +SYSCTL_PROC(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, + CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_NOLOCK, 0, 0, sysctl_kern_proc_sigtramp, "S,sigtramp", "Return sigtramp address range"); diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 9ea9076259..1d85018afb 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1216,7 +1216,15 @@ sysctl_root(SYSCTL_HANDLER_ARGS) lktype = LK_SHARED; if (oid->oid_kind & CTLFLAG_EXLOCK) lktype = LK_EXCLUSIVE; +#if 1 lockmgr(&oid->oid_lock, lktype); +#else + /* DEBUGGING */ + if (lockmgr(&oid->oid_lock, lktype | LK_SLEEPFAIL)) { + kprintf("%s\n", oid->oid_name); + lockmgr(&oid->oid_lock, lktype); + } +#endif } if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h index 4768787b64..edfaa31109 100644 --- a/sys/sys/sysctl.h +++ b/sys/sys/sysctl.h @@ -253,57 +253,69 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); /* Oid for an int. If ptr is NULL, val is returned. */ #define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|(access), \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_INT|CTLFLAG_NOLOCK|(access), \ ptr, val, sysctl_handle_int, "I", descr) #define SYSCTL_ADD_INT(ctx, parent, nbr, name, access, ptr, val, descr) \ - sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_INT|(access), \ + sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_INT|CTLFLAG_NOLOCK|(access), \ ptr, val, sysctl_handle_int, "I", descr) /* Oid for a quad. If ptr is NULL, val is returned. */ #define SYSCTL_QUAD(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_QUAD|(access), \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_QUAD|CTLFLAG_NOLOCK|(access), \ ptr, val, sysctl_handle_quad, "Q", descr) #define SYSCTL_ADD_QUAD(ctx, parent, nbr, name, access, ptr, val, descr) \ - sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_QUAD|(access), \ - ptr, val, sysctl_handle_quad, "Q", descr) + sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_QUAD|CTLFLAG_NOLOCK|(access), \ + ptr, val, sysctl_handle_quad, "Q", descr) /* Oid for an unsigned quad. If ptr is NULL, val is returned. */ #define SYSCTL_UQUAD(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_UQUAD|(access), \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_UQUAD|CTLFLAG_NOLOCK|(access), \ ptr, val, sysctl_handle_quad, "QU", descr) #define SYSCTL_ADD_UQUAD(ctx, parent, nbr, name, access, ptr, val, descr) \ - sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_UQUAD|(access), \ - ptr, val, sysctl_handle_quad, "QU", descr) + sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_UQUAD|CTLFLAG_NOLOCK|(access), \ + ptr, val, sysctl_handle_quad, "QU", descr) /* Oid for an unsigned int. If ptr is NULL, val is returned. */ #define SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_UINT|(access), \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_UINT|CTLFLAG_NOLOCK|(access), \ ptr, val, sysctl_handle_int, "IU", descr) #define SYSCTL_ADD_UINT(ctx, parent, nbr, name, access, ptr, val, descr) \ - sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_UINT|(access), \ - ptr, val, sysctl_handle_int, "IU", descr) + sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_UINT|CTLFLAG_NOLOCK|(access), \ + ptr, val, sysctl_handle_int, "IU", descr) /* Oid for a long. The pointer must be non NULL. */ #define SYSCTL_LONG(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_LONG|(access), \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_LONG|CTLFLAG_NOLOCK|(access), \ ptr, val, sysctl_handle_long, "L", descr) #define SYSCTL_ADD_LONG(ctx, parent, nbr, name, access, ptr, descr) \ - sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_LONG|(access), \ - ptr, 0, sysctl_handle_long, "L", descr) + sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_LONG|CTLFLAG_NOLOCK|(access), \ + ptr, 0, sysctl_handle_long, "L", descr) /* Oid for a long. The pointer must be non NULL. */ #define SYSCTL_ULONG(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_ULONG|(access), \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_ULONG|CTLFLAG_NOLOCK|(access), \ ptr, val, sysctl_handle_long, "LU", descr) #define SYSCTL_ADD_ULONG(ctx, parent, nbr, name, access, ptr, descr) \ - sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_ULONG|(access), \ - ptr, 0, sysctl_handle_long, "LU", descr) + sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_ULONG|CTLFLAG_NOLOCK|(access), \ + ptr, 0, sysctl_handle_long, "LU", descr) /* Oid for an opaque object. Specified by a pointer and a length. */ #define SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \ diff --git a/sys/vfs/devfs/devfs_core.c b/sys/vfs/devfs/devfs_core.c index eb77dcb44e..2e050afe4e 100644 --- a/sys/vfs/devfs/devfs_core.c +++ b/sys/vfs/devfs/devfs_core.c @@ -2669,11 +2669,11 @@ devfs_sysctl_devname_helper(SYSCTL_HANDLER_ARGS) cdev_t found; int error; - if ((error = SYSCTL_IN(req, &udev, sizeof(udev_t)))) return (error); - devfs_debug(DEVFS_DEBUG_DEBUG, "devfs sysctl, received udev: %d\n", udev); + devfs_debug(DEVFS_DEBUG_DEBUG, + "devfs sysctl, received udev: %d\n", udev); if (udev == NOUDEV) return(EINVAL); @@ -2685,8 +2685,10 @@ devfs_sysctl_devname_helper(SYSCTL_HANDLER_ARGS) } -SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY, - NULL, 0, devfs_sysctl_devname_helper, "", "helper for devname(3)"); +SYSCTL_PROC(_kern, OID_AUTO, devname, + CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_NOLOCK, + NULL, 0, devfs_sysctl_devname_helper, "", + "helper for devname(3)"); SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "devfs"); TUNABLE_INT("vfs.devfs.debug", &devfs_debug_enable); diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 8e07b91b4b..3f496b6847 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -91,8 +91,9 @@ SYSCTL_UINT(_vm, VM_V_PAGEOUT_FREE_MIN, v_pageout_free_min, SYSCTL_UINT(_vm, OID_AUTO, v_free_severe, CTLFLAG_RW, &vmstats.v_free_severe, 0, ""); -SYSCTL_STRUCT(_vm, VM_LOADAVG, loadavg, CTLFLAG_RD, - &averunnable, loadavg, "Machine loadaverage history"); +SYSCTL_STRUCT(_vm, VM_LOADAVG, loadavg, + CTLFLAG_RD | CTLFLAG_NOLOCK, + &averunnable, loadavg, "Machine loadaverage history"); static int do_vmtotal_callback(struct proc *p, void *data); -- 2.41.0