kernel - Upgrade buffer space tracking variables from int to long
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 12 Nov 2011 06:20:12 +0000 (22:20 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sat, 12 Nov 2011 06:22:56 +0000 (22:22 -0800)
* Several bufspace-related buffer cache parameters can now overflow a
  32 bit integer on machines with large amounts (~64G+) of memory.
  Change these to long.

  bufspace, maxbufspace, maxbufmallocspace, lobufspace, hibufspace,
  lorunningspace, hirunningspace, dirtybufspace,
  dirtybufspacehw, runningbufspace, lodirtybufspace, hidirtybufspace.

* Also requires an API change to libkcore/libkinfo which effects top.

lib/libkcore/kcore.h
lib/libkcore/kcore_vfs.c
lib/libkinfo/kinfo.h
lib/libkinfo/kinfo_vfs.c
sys/kern/kern_iosched.c
sys/kern/vfs_bio.c
sys/sys/buf.h
sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_vfsops.c
usr.bin/top/m_dragonfly.c

index 1181e18..151654a 100644 (file)
@@ -93,7 +93,7 @@ int    kcore_get_sched_profhz(struct kcore_data *, int *);
 int     kcore_get_sched_stathz(struct kcore_data *, int *);
 int     kcore_get_tty_tk_nin(struct kcore_data *, uint64_t *);
 int     kcore_get_tty_tk_nout(struct kcore_data *, uint64_t *);
-int     kcore_get_vfs_bufspace(struct kcore_data *, int *);
+int     kcore_get_vfs_bufspace(struct kcore_data *, long *);
 __END_DECLS;
 
 #endif
index e887fc4..78cf045 100644 (file)
@@ -46,7 +46,7 @@
 #include "kcore_private.h"
 
 int
-kcore_get_vfs_bufspace(struct kcore_data *kc, int *bufspace)
+kcore_get_vfs_bufspace(struct kcore_data *kc, long *bufspace)
 {
        static struct nlist nl[] = {
                { "_bufspace", 0, 0, 0, 0},
index dd04a57..fdb87e3 100644 (file)
@@ -67,7 +67,7 @@ int   kinfo_get_tty_tk_nin(uint64_t *);
 int    kinfo_get_tty_tk_nout(uint64_t *);
 
 /* VFS */
-int    kinfo_get_vfs_bufspace(int *);
+int    kinfo_get_vfs_bufspace(long *);
 
 /* Per-CPU accumulators */
 PCPU_STATISTICS_PROT(cputime, struct kinfo_cputime);
index df5857c..03d84ac 100644 (file)
@@ -42,7 +42,7 @@
 #include <stddef.h>
 
 int
-kinfo_get_vfs_bufspace(int *bufspace)
+kinfo_get_vfs_bufspace(long *bufspace)
 {
        size_t len = sizeof(*bufspace);
 
index 145e477..f405a57 100644 (file)
@@ -129,8 +129,8 @@ biosched_done(thread_t td)
 void
 bwillwrite(int bytes)
 {
-       int count;
-       int factor;
+       long count;
+       long factor;
 
        count = bd_heatup();
        if (count > 0) {
index 20598f3..35b6a80 100644 (file)
@@ -119,22 +119,22 @@ vm_page_t bogus_page;
  * These are all static, but make the ones we export globals so we do
  * not need to use compiler magic.
  */
-int bufspace;                  /* locked by buffer_map */
-int maxbufspace;
-static int bufmallocspace;     /* atomic ops */
-int maxbufmallocspace, lobufspace, hibufspace;
+long bufspace;                 /* locked by buffer_map */
+long maxbufspace;
+static long bufmallocspace;    /* atomic ops */
+long maxbufmallocspace, lobufspace, hibufspace;
 static int bufreusecnt, bufdefragcnt, buffreekvacnt;
-static int lorunningspace;
-static int hirunningspace;
+static long lorunningspace;
+static long hirunningspace;
 static int runningbufreq;              /* locked by bufcspin */
-static int dirtybufspace;              /* locked by bufcspin */
+static long dirtybufspace;             /* locked by bufcspin */
 static int dirtybufcount;              /* locked by bufcspin */
-static int dirtybufspacehw;            /* locked by bufcspin */
+static long dirtybufspacehw;           /* locked by bufcspin */
 static int dirtybufcounthw;            /* locked by bufcspin */
-static int runningbufspace;            /* locked by bufcspin */
+static long runningbufspace;           /* locked by bufcspin */
 static int runningbufcount;            /* locked by bufcspin */
-int lodirtybufspace;
-int hidirtybufspace;
+long lodirtybufspace;
+long hidirtybufspace;
 static int getnewbufcalls;
 static int getnewbufrestarts;
 static int recoverbufcalls;
@@ -154,13 +154,13 @@ static u_int lowmempgfails;
 /*
  * Sysctls for operational control of the buffer cache.
  */
-SYSCTL_INT(_vfs, OID_AUTO, lodirtybufspace, CTLFLAG_RW, &lodirtybufspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, lodirtybufspace, CTLFLAG_RW, &lodirtybufspace, 0,
        "Number of dirty buffers to flush before bufdaemon becomes inactive");
-SYSCTL_INT(_vfs, OID_AUTO, hidirtybufspace, CTLFLAG_RW, &hidirtybufspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, hidirtybufspace, CTLFLAG_RW, &hidirtybufspace, 0,
        "High watermark used to trigger explicit flushing of dirty buffers");
-SYSCTL_INT(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, &lorunningspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, &lorunningspace, 0,
        "Minimum amount of buffer space required for active I/O");
-SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0,
        "Maximum amount of buffer space to usable for active I/O");
 SYSCTL_UINT(_vfs, OID_AUTO, lowmempgallocs, CTLFLAG_RW, &lowmempgallocs, 0,
        "Page allocations done during periods of very low free memory");
@@ -173,29 +173,29 @@ SYSCTL_UINT(_vfs, OID_AUTO, vm_cycle_point, CTLFLAG_RW, &vm_cycle_point, 0,
  */
 SYSCTL_INT(_vfs, OID_AUTO, nbuf, CTLFLAG_RD, &nbuf, 0,
        "Total number of buffers in buffer cache");
-SYSCTL_INT(_vfs, OID_AUTO, dirtybufspace, CTLFLAG_RD, &dirtybufspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, dirtybufspace, CTLFLAG_RD, &dirtybufspace, 0,
        "Pending bytes of dirty buffers (all)");
-SYSCTL_INT(_vfs, OID_AUTO, dirtybufspacehw, CTLFLAG_RD, &dirtybufspacehw, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, dirtybufspacehw, CTLFLAG_RD, &dirtybufspacehw, 0,
        "Pending bytes of dirty buffers (heavy weight)");
 SYSCTL_INT(_vfs, OID_AUTO, dirtybufcount, CTLFLAG_RD, &dirtybufcount, 0,
        "Pending number of dirty buffers");
 SYSCTL_INT(_vfs, OID_AUTO, dirtybufcounthw, CTLFLAG_RD, &dirtybufcounthw, 0,
        "Pending number of dirty buffers (heavy weight)");
-SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0,
        "I/O bytes currently in progress due to asynchronous writes");
 SYSCTL_INT(_vfs, OID_AUTO, runningbufcount, CTLFLAG_RD, &runningbufcount, 0,
        "I/O buffers currently in progress due to asynchronous writes");
-SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0,
        "Hard limit on maximum amount of memory usable for buffer space");
-SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0,
        "Soft limit on maximum amount of memory usable for buffer space");
-SYSCTL_INT(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0,
        "Minimum amount of memory to reserve for system buffer space");
-SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0,
        "Amount of memory available for buffers");
-SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RD, &maxbufmallocspace,
+SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RD, &maxbufmallocspace,
        0, "Maximum amount of memory reserved for buffers using malloc");
-SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0,
+SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0,
        "Amount of memory left for buffers using malloc-scheme");
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RD, &getnewbufcalls, 0,
        "New buffer header acquisition requests");
@@ -255,8 +255,8 @@ bufspacewakeup(void)
 static __inline void
 runningbufwakeup(struct buf *bp)
 {
-       int totalspace;
-       int limit;
+       long totalspace;
+       long limit;
 
        if ((totalspace = bp->b_runningbufspace) != 0) {
                spin_lock(&bufcspin);
@@ -317,7 +317,7 @@ bufcountwakeup(void)
 void
 waitrunningbufspace(void)
 {
-       int limit = hirunningspace * 4 / 6;
+       long limit = hirunningspace * 4 / 6;
 
        if (runningbufspace > limit || runningbufreq) {
                spin_lock(&bufcspin);
@@ -422,9 +422,9 @@ bd_speedup(void)
 int
 bd_heatup(void)
 {
-       int mid1;
-       int mid2;
-       int totalspace;
+       long mid1;
+       long mid2;
+       long totalspace;
 
        mid1 = lodirtybufspace + (hidirtybufspace - lodirtybufspace) / 2;
 
@@ -645,7 +645,7 @@ bufinit(void)
         * this may result in KVM fragmentation which is not handled optimally
         * by the system.
         */
-       maxbufspace = nbuf * BKVASIZE;
+       maxbufspace = (long)nbuf * BKVASIZE;
        hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10);
        lobufspace = hibufspace - MAXBSIZE;
 
@@ -2514,7 +2514,7 @@ SYSINIT(bufdaemon_hw, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST,
 static void
 buf_daemon(void)
 {
-       int limit;
+       long limit;
 
        /*
         * This process needs to be suspended prior to shutdown sync.
@@ -2573,7 +2573,7 @@ buf_daemon(void)
 static void
 buf_daemon_hw(void)
 {
-       int limit;
+       long limit;
 
        /*
         * This process needs to be suspended prior to shutdown sync.
@@ -3306,7 +3306,7 @@ allocbuf(struct buf *bp, int size)
                                } else {
                                        kfree(bp->b_data, M_BIOBUF);
                                        if (bp->b_bufsize) {
-                                               atomic_subtract_int(&bufmallocspace, bp->b_bufsize);
+                                               atomic_subtract_long(&bufmallocspace, bp->b_bufsize);
                                                bufspacewakeup();
                                                bp->b_bufsize = 0;
                                        }
@@ -3334,7 +3334,7 @@ allocbuf(struct buf *bp, int size)
                                bp->b_bufsize = mbsize;
                                bp->b_bcount = size;
                                bp->b_flags |= B_MALLOC;
-                               atomic_add_int(&bufmallocspace, mbsize);
+                               atomic_add_long(&bufmallocspace, mbsize);
                                return 1;
                        }
                        origbuf = NULL;
@@ -3349,8 +3349,8 @@ allocbuf(struct buf *bp, int size)
                                origbufsize = bp->b_bufsize;
                                bp->b_data = bp->b_kvabase;
                                if (bp->b_bufsize) {
-                                       atomic_subtract_int(&bufmallocspace,
-                                                           bp->b_bufsize);
+                                       atomic_subtract_long(&bufmallocspace,
+                                                            bp->b_bufsize);
                                        bufspacewakeup();
                                        bp->b_bufsize = 0;
                                }
index 71877b3..0742325 100644 (file)
@@ -377,7 +377,7 @@ struct cluster_save {
 extern int     nbuf;                   /* The number of buffer headers */
 extern long    maxswzone;              /* Max KVA for swap structures */
 extern long    maxbcache;              /* Max KVA for buffer cache */
-extern int     hidirtybufspace;
+extern long    hidirtybufspace;
 extern int      buf_maxio;              /* nominal maximum I/O for buffer */
 extern struct buf *buf;                        /* The buffer headers. */
 extern char    *buffers;               /* The buffer contents. */
index 1bad1b1..c735a06 100644 (file)
@@ -911,8 +911,8 @@ struct hammer_mount {
        struct hammer_mod_rb_tree data_root;    /* dirty data buffers */
        struct hammer_mod_rb_tree meta_root;    /* dirty meta bufs    */
        struct hammer_mod_rb_tree lose_root;    /* loose buffers      */
-       int     locked_dirty_space;             /* meta/volu count    */
-       int     io_running_space;               /* io_token */
+       long    locked_dirty_space;             /* meta/volu count    */
+       long    io_running_space;               /* io_token */
        int     unused01;
        int     objid_cache_count;
        int     dedup_cache_count;
@@ -1038,13 +1038,13 @@ extern int64_t hammer_stats_inode_flushes;
 extern int64_t hammer_stats_commits;
 extern int64_t hammer_stats_undo;
 extern int64_t hammer_stats_redo;
-extern int hammer_count_dirtybufspace;
+extern long hammer_count_dirtybufspace;
 extern int hammer_count_refedbufs;
 extern int hammer_count_reservations;
-extern int hammer_count_io_running_read;
-extern int hammer_count_io_running_write;
+extern long hammer_count_io_running_read;
+extern long hammer_count_io_running_write;
 extern int hammer_count_io_locked;
-extern int hammer_limit_dirtybufspace;
+extern long hammer_limit_dirtybufspace;
 extern int hammer_limit_recs;
 extern int hammer_limit_inode_recs;
 extern int hammer_limit_reclaims;
index be5f0c3..8960485 100644 (file)
@@ -303,7 +303,7 @@ hammer_io_read(struct vnode *devvp, struct hammer_io *io, int limit)
        int   error;
 
        if ((bp = io->bp) == NULL) {
-               atomic_add_int(&hammer_count_io_running_read, io->bytes);
+               atomic_add_long(&hammer_count_io_running_read, io->bytes);
                if (hammer_cluster_enable && limit > io->bytes) {
                        error = cluster_read(devvp, io->offset + limit,
                                             io->offset, io->bytes,
@@ -314,7 +314,7 @@ hammer_io_read(struct vnode *devvp, struct hammer_io *io, int limit)
                        error = bread(devvp, io->offset, io->bytes, &io->bp);
                }
                hammer_stats_disk_read += io->bytes;
-               atomic_add_int(&hammer_count_io_running_read, -io->bytes);
+               atomic_add_long(&hammer_count_io_running_read, -io->bytes);
 
                /*
                 * The code generally assumes b_ops/b_dep has been set-up,
@@ -737,8 +737,8 @@ hammer_io_flush(struct hammer_io *io, int reclaim)
         *       update io_running_space.
         */
        io->running = 1;
-       atomic_add_int(&hmp->io_running_space, io->bytes);
-       atomic_add_int(&hammer_count_io_running_write, io->bytes);
+       atomic_add_long(&hmp->io_running_space, io->bytes);
+       atomic_add_long(&hammer_count_io_running_write, io->bytes);
        lwkt_gettoken(&hmp->io_token);
        TAILQ_INSERT_TAIL(&hmp->iorun_list, io, iorun_entry);
        lwkt_reltoken(&hmp->io_token);
@@ -943,7 +943,7 @@ hammer_io_clear_modify(struct hammer_io *io, int inval)
        if (io->mod_root == &io->hmp->volu_root ||
            io->mod_root == &io->hmp->meta_root) {
                io->hmp->locked_dirty_space -= io->bytes;
-               atomic_add_int(&hammer_count_dirtybufspace, -io->bytes);
+               atomic_add_long(&hammer_count_dirtybufspace, -io->bytes);
        }
        RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io);
        io->mod_root = NULL;
@@ -1019,12 +1019,12 @@ hammer_io_set_modlist(struct hammer_io *io)
        case HAMMER_STRUCTURE_VOLUME:
                io->mod_root = &hmp->volu_root;
                hmp->locked_dirty_space += io->bytes;
-               atomic_add_int(&hammer_count_dirtybufspace, io->bytes);
+               atomic_add_long(&hammer_count_dirtybufspace, io->bytes);
                break;
        case HAMMER_STRUCTURE_META_BUFFER:
                io->mod_root = &hmp->meta_root;
                hmp->locked_dirty_space += io->bytes;
-               atomic_add_int(&hammer_count_dirtybufspace, io->bytes);
+               atomic_add_long(&hammer_count_dirtybufspace, io->bytes);
                break;
        case HAMMER_STRUCTURE_UNDO_BUFFER:
                io->mod_root = &hmp->undo_root;
@@ -1122,8 +1122,8 @@ hammer_io_complete(struct buf *bp)
 #endif
                }
                hammer_stats_disk_write += iou->io.bytes;
-               atomic_add_int(&hammer_count_io_running_write, -iou->io.bytes);
-               atomic_add_int(&hmp->io_running_space, -iou->io.bytes);
+               atomic_add_long(&hammer_count_io_running_write, -iou->io.bytes);
+               atomic_add_long(&hmp->io_running_space, -iou->io.bytes);
                KKASSERT(hmp->io_running_space >= 0);
                iou->io.running = 0;
 
@@ -1350,8 +1350,8 @@ hammer_io_checkwrite(struct buf *bp)
         */
        KKASSERT(io->running == 0);
        io->running = 1;
-       atomic_add_int(&io->hmp->io_running_space, io->bytes);
-       atomic_add_int(&hammer_count_io_running_write, io->bytes);
+       atomic_add_long(&io->hmp->io_running_space, io->bytes);
+       atomic_add_long(&hammer_count_io_running_write, io->bytes);
        TAILQ_INSERT_TAIL(&io->hmp->iorun_list, io, iorun_entry);
 
        hammer_put_interlock(&io->lock, 1);
index ccd0ff3..62a73c9 100644 (file)
@@ -94,13 +94,13 @@ int64_t hammer_stats_commits;
 int64_t hammer_stats_undo;
 int64_t hammer_stats_redo;
 
-int hammer_count_dirtybufspace;                /* global */
+long hammer_count_dirtybufspace;       /* global */
 int hammer_count_refedbufs;            /* global */
 int hammer_count_reservations;
-int hammer_count_io_running_read;
-int hammer_count_io_running_write;
+long hammer_count_io_running_read;
+long hammer_count_io_running_write;
 int hammer_count_io_locked;
-int hammer_limit_dirtybufspace;                /* per-mount */
+long hammer_limit_dirtybufspace;       /* per-mount */
 int hammer_limit_recs;                 /* as a whole XXX */
 int hammer_limit_inode_recs = 2048;    /* per inode */
 int hammer_limit_reclaims;
@@ -163,7 +163,7 @@ SYSCTL_INT(_vfs_hammer, OID_AUTO, live_dedup, CTLFLAG_RW,
 SYSCTL_INT(_vfs_hammer, OID_AUTO, tdmux_ticks, CTLFLAG_RW,
           &hammer_tdmux_ticks, 0, "Hammer tdmux ticks");
 
-SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_dirtybufspace, CTLFLAG_RW,
+SYSCTL_LONG(_vfs_hammer, OID_AUTO, limit_dirtybufspace, CTLFLAG_RW,
           &hammer_limit_dirtybufspace, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_recs, CTLFLAG_RW,
           &hammer_limit_recs, 0, "");
@@ -251,17 +251,17 @@ SYSCTL_QUAD(_vfs_hammer, OID_AUTO, live_dedup_bmap_saves, CTLFLAG_RW,
            &hammer_live_dedup_bmap_saves, 0,
            "useful physical block lookups");
 
-SYSCTL_INT(_vfs_hammer, OID_AUTO, count_dirtybufspace, CTLFLAG_RD,
+SYSCTL_LONG(_vfs_hammer, OID_AUTO, count_dirtybufspace, CTLFLAG_RD,
           &hammer_count_dirtybufspace, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_refedbufs, CTLFLAG_RD,
           &hammer_count_refedbufs, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_reservations, CTLFLAG_RD,
           &hammer_count_reservations, 0, "");
-SYSCTL_INT(_vfs_hammer, OID_AUTO, count_io_running_read, CTLFLAG_RD,
+SYSCTL_LONG(_vfs_hammer, OID_AUTO, count_io_running_read, CTLFLAG_RD,
           &hammer_count_io_running_read, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_io_locked, CTLFLAG_RD,
           &hammer_count_io_locked, 0, "");
-SYSCTL_INT(_vfs_hammer, OID_AUTO, count_io_running_write, CTLFLAG_RD,
+SYSCTL_LONG(_vfs_hammer, OID_AUTO, count_io_running_write, CTLFLAG_RD,
           &hammer_count_io_running_write, 0, "");
 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, zone_limit, CTLFLAG_RW,
           &hammer_zone_limit, 0, "");
index f743f78..99d982f 100644 (file)
@@ -391,7 +391,7 @@ get_system_info(struct system_info *si)
                static unsigned int swap_delay = 0;
                static int swapavail = 0;
                static int swapfree = 0;
-               static int bufspace = 0;
+               static long bufspace = 0;
 
                if (sysctlbyname("vm.vmstats", &vms, &vms_size, NULL, 0))
                        err(1, "sysctlbyname: vm.vmstats");