From 3583bbb419ec67c251daa38bc58fa0739ad1eeac Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 11 Nov 2011 22:20:12 -0800 Subject: [PATCH] kernel - Upgrade buffer space tracking variables from int to long * Several bufspace-related buffer cache parameters can now overflow a 32 bit integer on machines with large amounts (~64G+) of memory. Change these to long. bufspace, maxbufspace, maxbufmallocspace, lobufspace, hibufspace, lorunningspace, hirunningspace, dirtybufspace, dirtybufspacehw, runningbufspace, lodirtybufspace, hidirtybufspace. * Also requires an API change to libkcore/libkinfo which effects top. --- lib/libkcore/kcore.h | 2 +- lib/libkcore/kcore_vfs.c | 2 +- lib/libkinfo/kinfo.h | 2 +- lib/libkinfo/kinfo_vfs.c | 2 +- sys/kern/kern_iosched.c | 4 +- sys/kern/vfs_bio.c | 74 +++++++++++++++++----------------- sys/sys/buf.h | 2 +- sys/vfs/hammer/hammer.h | 12 +++--- sys/vfs/hammer/hammer_io.c | 22 +++++----- sys/vfs/hammer/hammer_vfsops.c | 16 ++++---- usr.bin/top/m_dragonfly.c | 2 +- 11 files changed, 70 insertions(+), 70 deletions(-) diff --git a/lib/libkcore/kcore.h b/lib/libkcore/kcore.h index 1181e1845a..151654aa38 100644 --- a/lib/libkcore/kcore.h +++ b/lib/libkcore/kcore.h @@ -93,7 +93,7 @@ int kcore_get_sched_profhz(struct kcore_data *, int *); int kcore_get_sched_stathz(struct kcore_data *, int *); int kcore_get_tty_tk_nin(struct kcore_data *, uint64_t *); int kcore_get_tty_tk_nout(struct kcore_data *, uint64_t *); -int kcore_get_vfs_bufspace(struct kcore_data *, int *); +int kcore_get_vfs_bufspace(struct kcore_data *, long *); __END_DECLS; #endif diff --git a/lib/libkcore/kcore_vfs.c b/lib/libkcore/kcore_vfs.c index e887fc420d..78cf045131 100644 --- a/lib/libkcore/kcore_vfs.c +++ b/lib/libkcore/kcore_vfs.c @@ -46,7 +46,7 @@ #include "kcore_private.h" int -kcore_get_vfs_bufspace(struct kcore_data *kc, int *bufspace) +kcore_get_vfs_bufspace(struct kcore_data *kc, long *bufspace) { static struct nlist nl[] = { { "_bufspace", 0, 0, 0, 0}, diff --git a/lib/libkinfo/kinfo.h b/lib/libkinfo/kinfo.h index dd04a5709f..fdb87e3c71 100644 --- a/lib/libkinfo/kinfo.h +++ b/lib/libkinfo/kinfo.h @@ -67,7 +67,7 @@ int kinfo_get_tty_tk_nin(uint64_t *); int kinfo_get_tty_tk_nout(uint64_t *); /* VFS */ -int kinfo_get_vfs_bufspace(int *); +int kinfo_get_vfs_bufspace(long *); /* Per-CPU accumulators */ PCPU_STATISTICS_PROT(cputime, struct kinfo_cputime); diff --git a/lib/libkinfo/kinfo_vfs.c b/lib/libkinfo/kinfo_vfs.c index df5857ca0b..03d84ac430 100644 --- a/lib/libkinfo/kinfo_vfs.c +++ b/lib/libkinfo/kinfo_vfs.c @@ -42,7 +42,7 @@ #include int -kinfo_get_vfs_bufspace(int *bufspace) +kinfo_get_vfs_bufspace(long *bufspace) { size_t len = sizeof(*bufspace); diff --git a/sys/kern/kern_iosched.c b/sys/kern/kern_iosched.c index 145e477ef8..f405a5785e 100644 --- a/sys/kern/kern_iosched.c +++ b/sys/kern/kern_iosched.c @@ -129,8 +129,8 @@ biosched_done(thread_t td) void bwillwrite(int bytes) { - int count; - int factor; + long count; + long factor; count = bd_heatup(); if (count > 0) { diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 20598f3118..35b6a80fcd 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -119,22 +119,22 @@ vm_page_t bogus_page; * These are all static, but make the ones we export globals so we do * not need to use compiler magic. */ -int bufspace; /* locked by buffer_map */ -int maxbufspace; -static int bufmallocspace; /* atomic ops */ -int maxbufmallocspace, lobufspace, hibufspace; +long bufspace; /* locked by buffer_map */ +long maxbufspace; +static long bufmallocspace; /* atomic ops */ +long maxbufmallocspace, lobufspace, hibufspace; static int bufreusecnt, bufdefragcnt, buffreekvacnt; -static int lorunningspace; -static int hirunningspace; +static long lorunningspace; +static long hirunningspace; static int runningbufreq; /* locked by bufcspin */ -static int dirtybufspace; /* locked by bufcspin */ +static long dirtybufspace; /* locked by bufcspin */ static int dirtybufcount; /* locked by bufcspin */ -static int dirtybufspacehw; /* locked by bufcspin */ +static long dirtybufspacehw; /* locked by bufcspin */ static int dirtybufcounthw; /* locked by bufcspin */ -static int runningbufspace; /* locked by bufcspin */ +static long runningbufspace; /* locked by bufcspin */ static int runningbufcount; /* locked by bufcspin */ -int lodirtybufspace; -int hidirtybufspace; +long lodirtybufspace; +long hidirtybufspace; static int getnewbufcalls; static int getnewbufrestarts; static int recoverbufcalls; @@ -154,13 +154,13 @@ static u_int lowmempgfails; /* * Sysctls for operational control of the buffer cache. */ -SYSCTL_INT(_vfs, OID_AUTO, lodirtybufspace, CTLFLAG_RW, &lodirtybufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, lodirtybufspace, CTLFLAG_RW, &lodirtybufspace, 0, "Number of dirty buffers to flush before bufdaemon becomes inactive"); -SYSCTL_INT(_vfs, OID_AUTO, hidirtybufspace, CTLFLAG_RW, &hidirtybufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, hidirtybufspace, CTLFLAG_RW, &hidirtybufspace, 0, "High watermark used to trigger explicit flushing of dirty buffers"); -SYSCTL_INT(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, &lorunningspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, &lorunningspace, 0, "Minimum amount of buffer space required for active I/O"); -SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0, "Maximum amount of buffer space to usable for active I/O"); SYSCTL_UINT(_vfs, OID_AUTO, lowmempgallocs, CTLFLAG_RW, &lowmempgallocs, 0, "Page allocations done during periods of very low free memory"); @@ -173,29 +173,29 @@ SYSCTL_UINT(_vfs, OID_AUTO, vm_cycle_point, CTLFLAG_RW, &vm_cycle_point, 0, */ SYSCTL_INT(_vfs, OID_AUTO, nbuf, CTLFLAG_RD, &nbuf, 0, "Total number of buffers in buffer cache"); -SYSCTL_INT(_vfs, OID_AUTO, dirtybufspace, CTLFLAG_RD, &dirtybufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, dirtybufspace, CTLFLAG_RD, &dirtybufspace, 0, "Pending bytes of dirty buffers (all)"); -SYSCTL_INT(_vfs, OID_AUTO, dirtybufspacehw, CTLFLAG_RD, &dirtybufspacehw, 0, +SYSCTL_LONG(_vfs, OID_AUTO, dirtybufspacehw, CTLFLAG_RD, &dirtybufspacehw, 0, "Pending bytes of dirty buffers (heavy weight)"); SYSCTL_INT(_vfs, OID_AUTO, dirtybufcount, CTLFLAG_RD, &dirtybufcount, 0, "Pending number of dirty buffers"); SYSCTL_INT(_vfs, OID_AUTO, dirtybufcounthw, CTLFLAG_RD, &dirtybufcounthw, 0, "Pending number of dirty buffers (heavy weight)"); -SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0, "I/O bytes currently in progress due to asynchronous writes"); SYSCTL_INT(_vfs, OID_AUTO, runningbufcount, CTLFLAG_RD, &runningbufcount, 0, "I/O buffers currently in progress due to asynchronous writes"); -SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0, "Hard limit on maximum amount of memory usable for buffer space"); -SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0, "Soft limit on maximum amount of memory usable for buffer space"); -SYSCTL_INT(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0, "Minimum amount of memory to reserve for system buffer space"); -SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0, "Amount of memory available for buffers"); -SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RD, &maxbufmallocspace, +SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RD, &maxbufmallocspace, 0, "Maximum amount of memory reserved for buffers using malloc"); -SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0, "Amount of memory left for buffers using malloc-scheme"); SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RD, &getnewbufcalls, 0, "New buffer header acquisition requests"); @@ -255,8 +255,8 @@ bufspacewakeup(void) static __inline void runningbufwakeup(struct buf *bp) { - int totalspace; - int limit; + long totalspace; + long limit; if ((totalspace = bp->b_runningbufspace) != 0) { spin_lock(&bufcspin); @@ -317,7 +317,7 @@ bufcountwakeup(void) void waitrunningbufspace(void) { - int limit = hirunningspace * 4 / 6; + long limit = hirunningspace * 4 / 6; if (runningbufspace > limit || runningbufreq) { spin_lock(&bufcspin); @@ -422,9 +422,9 @@ bd_speedup(void) int bd_heatup(void) { - int mid1; - int mid2; - int totalspace; + long mid1; + long mid2; + long totalspace; mid1 = lodirtybufspace + (hidirtybufspace - lodirtybufspace) / 2; @@ -645,7 +645,7 @@ bufinit(void) * this may result in KVM fragmentation which is not handled optimally * by the system. */ - maxbufspace = nbuf * BKVASIZE; + maxbufspace = (long)nbuf * BKVASIZE; hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10); lobufspace = hibufspace - MAXBSIZE; @@ -2514,7 +2514,7 @@ SYSINIT(bufdaemon_hw, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, static void buf_daemon(void) { - int limit; + long limit; /* * This process needs to be suspended prior to shutdown sync. @@ -2573,7 +2573,7 @@ buf_daemon(void) static void buf_daemon_hw(void) { - int limit; + long limit; /* * This process needs to be suspended prior to shutdown sync. @@ -3306,7 +3306,7 @@ allocbuf(struct buf *bp, int size) } else { kfree(bp->b_data, M_BIOBUF); if (bp->b_bufsize) { - atomic_subtract_int(&bufmallocspace, bp->b_bufsize); + atomic_subtract_long(&bufmallocspace, bp->b_bufsize); bufspacewakeup(); bp->b_bufsize = 0; } @@ -3334,7 +3334,7 @@ allocbuf(struct buf *bp, int size) bp->b_bufsize = mbsize; bp->b_bcount = size; bp->b_flags |= B_MALLOC; - atomic_add_int(&bufmallocspace, mbsize); + atomic_add_long(&bufmallocspace, mbsize); return 1; } origbuf = NULL; @@ -3349,8 +3349,8 @@ allocbuf(struct buf *bp, int size) origbufsize = bp->b_bufsize; bp->b_data = bp->b_kvabase; if (bp->b_bufsize) { - atomic_subtract_int(&bufmallocspace, - bp->b_bufsize); + atomic_subtract_long(&bufmallocspace, + bp->b_bufsize); bufspacewakeup(); bp->b_bufsize = 0; } diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 71877b32bd..07423254de 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -377,7 +377,7 @@ struct cluster_save { extern int nbuf; /* The number of buffer headers */ extern long maxswzone; /* Max KVA for swap structures */ extern long maxbcache; /* Max KVA for buffer cache */ -extern int hidirtybufspace; +extern long hidirtybufspace; extern int buf_maxio; /* nominal maximum I/O for buffer */ extern struct buf *buf; /* The buffer headers. */ extern char *buffers; /* The buffer contents. */ diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 1bad1b12da..c735a06f1b 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -911,8 +911,8 @@ struct hammer_mount { struct hammer_mod_rb_tree data_root; /* dirty data buffers */ struct hammer_mod_rb_tree meta_root; /* dirty meta bufs */ struct hammer_mod_rb_tree lose_root; /* loose buffers */ - int locked_dirty_space; /* meta/volu count */ - int io_running_space; /* io_token */ + long locked_dirty_space; /* meta/volu count */ + long io_running_space; /* io_token */ int unused01; int objid_cache_count; int dedup_cache_count; @@ -1038,13 +1038,13 @@ extern int64_t hammer_stats_inode_flushes; extern int64_t hammer_stats_commits; extern int64_t hammer_stats_undo; extern int64_t hammer_stats_redo; -extern int hammer_count_dirtybufspace; +extern long hammer_count_dirtybufspace; extern int hammer_count_refedbufs; extern int hammer_count_reservations; -extern int hammer_count_io_running_read; -extern int hammer_count_io_running_write; +extern long hammer_count_io_running_read; +extern long hammer_count_io_running_write; extern int hammer_count_io_locked; -extern int hammer_limit_dirtybufspace; +extern long hammer_limit_dirtybufspace; extern int hammer_limit_recs; extern int hammer_limit_inode_recs; extern int hammer_limit_reclaims; diff --git a/sys/vfs/hammer/hammer_io.c b/sys/vfs/hammer/hammer_io.c index be5f0c3a37..896048531f 100644 --- a/sys/vfs/hammer/hammer_io.c +++ b/sys/vfs/hammer/hammer_io.c @@ -303,7 +303,7 @@ hammer_io_read(struct vnode *devvp, struct hammer_io *io, int limit) int error; if ((bp = io->bp) == NULL) { - atomic_add_int(&hammer_count_io_running_read, io->bytes); + atomic_add_long(&hammer_count_io_running_read, io->bytes); if (hammer_cluster_enable && limit > io->bytes) { error = cluster_read(devvp, io->offset + limit, io->offset, io->bytes, @@ -314,7 +314,7 @@ hammer_io_read(struct vnode *devvp, struct hammer_io *io, int limit) error = bread(devvp, io->offset, io->bytes, &io->bp); } hammer_stats_disk_read += io->bytes; - atomic_add_int(&hammer_count_io_running_read, -io->bytes); + atomic_add_long(&hammer_count_io_running_read, -io->bytes); /* * The code generally assumes b_ops/b_dep has been set-up, @@ -737,8 +737,8 @@ hammer_io_flush(struct hammer_io *io, int reclaim) * update io_running_space. */ io->running = 1; - atomic_add_int(&hmp->io_running_space, io->bytes); - atomic_add_int(&hammer_count_io_running_write, io->bytes); + atomic_add_long(&hmp->io_running_space, io->bytes); + atomic_add_long(&hammer_count_io_running_write, io->bytes); lwkt_gettoken(&hmp->io_token); TAILQ_INSERT_TAIL(&hmp->iorun_list, io, iorun_entry); lwkt_reltoken(&hmp->io_token); @@ -943,7 +943,7 @@ hammer_io_clear_modify(struct hammer_io *io, int inval) if (io->mod_root == &io->hmp->volu_root || io->mod_root == &io->hmp->meta_root) { io->hmp->locked_dirty_space -= io->bytes; - atomic_add_int(&hammer_count_dirtybufspace, -io->bytes); + atomic_add_long(&hammer_count_dirtybufspace, -io->bytes); } RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io); io->mod_root = NULL; @@ -1019,12 +1019,12 @@ hammer_io_set_modlist(struct hammer_io *io) case HAMMER_STRUCTURE_VOLUME: io->mod_root = &hmp->volu_root; hmp->locked_dirty_space += io->bytes; - atomic_add_int(&hammer_count_dirtybufspace, io->bytes); + atomic_add_long(&hammer_count_dirtybufspace, io->bytes); break; case HAMMER_STRUCTURE_META_BUFFER: io->mod_root = &hmp->meta_root; hmp->locked_dirty_space += io->bytes; - atomic_add_int(&hammer_count_dirtybufspace, io->bytes); + atomic_add_long(&hammer_count_dirtybufspace, io->bytes); break; case HAMMER_STRUCTURE_UNDO_BUFFER: io->mod_root = &hmp->undo_root; @@ -1122,8 +1122,8 @@ hammer_io_complete(struct buf *bp) #endif } hammer_stats_disk_write += iou->io.bytes; - atomic_add_int(&hammer_count_io_running_write, -iou->io.bytes); - atomic_add_int(&hmp->io_running_space, -iou->io.bytes); + atomic_add_long(&hammer_count_io_running_write, -iou->io.bytes); + atomic_add_long(&hmp->io_running_space, -iou->io.bytes); KKASSERT(hmp->io_running_space >= 0); iou->io.running = 0; @@ -1350,8 +1350,8 @@ hammer_io_checkwrite(struct buf *bp) */ KKASSERT(io->running == 0); io->running = 1; - atomic_add_int(&io->hmp->io_running_space, io->bytes); - atomic_add_int(&hammer_count_io_running_write, io->bytes); + atomic_add_long(&io->hmp->io_running_space, io->bytes); + atomic_add_long(&hammer_count_io_running_write, io->bytes); TAILQ_INSERT_TAIL(&io->hmp->iorun_list, io, iorun_entry); hammer_put_interlock(&io->lock, 1); diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index ccd0ff37b9..62a73c981f 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -94,13 +94,13 @@ int64_t hammer_stats_commits; int64_t hammer_stats_undo; int64_t hammer_stats_redo; -int hammer_count_dirtybufspace; /* global */ +long hammer_count_dirtybufspace; /* global */ int hammer_count_refedbufs; /* global */ int hammer_count_reservations; -int hammer_count_io_running_read; -int hammer_count_io_running_write; +long hammer_count_io_running_read; +long hammer_count_io_running_write; int hammer_count_io_locked; -int hammer_limit_dirtybufspace; /* per-mount */ +long hammer_limit_dirtybufspace; /* per-mount */ int hammer_limit_recs; /* as a whole XXX */ int hammer_limit_inode_recs = 2048; /* per inode */ int hammer_limit_reclaims; @@ -163,7 +163,7 @@ SYSCTL_INT(_vfs_hammer, OID_AUTO, live_dedup, CTLFLAG_RW, SYSCTL_INT(_vfs_hammer, OID_AUTO, tdmux_ticks, CTLFLAG_RW, &hammer_tdmux_ticks, 0, "Hammer tdmux ticks"); -SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_dirtybufspace, CTLFLAG_RW, +SYSCTL_LONG(_vfs_hammer, OID_AUTO, limit_dirtybufspace, CTLFLAG_RW, &hammer_limit_dirtybufspace, 0, ""); SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_recs, CTLFLAG_RW, &hammer_limit_recs, 0, ""); @@ -251,17 +251,17 @@ SYSCTL_QUAD(_vfs_hammer, OID_AUTO, live_dedup_bmap_saves, CTLFLAG_RW, &hammer_live_dedup_bmap_saves, 0, "useful physical block lookups"); -SYSCTL_INT(_vfs_hammer, OID_AUTO, count_dirtybufspace, CTLFLAG_RD, +SYSCTL_LONG(_vfs_hammer, OID_AUTO, count_dirtybufspace, CTLFLAG_RD, &hammer_count_dirtybufspace, 0, ""); SYSCTL_INT(_vfs_hammer, OID_AUTO, count_refedbufs, CTLFLAG_RD, &hammer_count_refedbufs, 0, ""); SYSCTL_INT(_vfs_hammer, OID_AUTO, count_reservations, CTLFLAG_RD, &hammer_count_reservations, 0, ""); -SYSCTL_INT(_vfs_hammer, OID_AUTO, count_io_running_read, CTLFLAG_RD, +SYSCTL_LONG(_vfs_hammer, OID_AUTO, count_io_running_read, CTLFLAG_RD, &hammer_count_io_running_read, 0, ""); SYSCTL_INT(_vfs_hammer, OID_AUTO, count_io_locked, CTLFLAG_RD, &hammer_count_io_locked, 0, ""); -SYSCTL_INT(_vfs_hammer, OID_AUTO, count_io_running_write, CTLFLAG_RD, +SYSCTL_LONG(_vfs_hammer, OID_AUTO, count_io_running_write, CTLFLAG_RD, &hammer_count_io_running_write, 0, ""); SYSCTL_QUAD(_vfs_hammer, OID_AUTO, zone_limit, CTLFLAG_RW, &hammer_zone_limit, 0, ""); diff --git a/usr.bin/top/m_dragonfly.c b/usr.bin/top/m_dragonfly.c index f743f78115..99d982f1e2 100644 --- a/usr.bin/top/m_dragonfly.c +++ b/usr.bin/top/m_dragonfly.c @@ -391,7 +391,7 @@ get_system_info(struct system_info *si) static unsigned int swap_delay = 0; static int swapavail = 0; static int swapfree = 0; - static int bufspace = 0; + static long bufspace = 0; if (sysctlbyname("vm.vmstats", &vms, &vms_size, NULL, 0)) err(1, "sysctlbyname: vm.vmstats"); -- 2.41.0