From e0fb398bfbef1fb6d12dfb8308cdc83ce663cbc2 Mon Sep 17 00:00:00 2001 From: Tim Date: Fri, 7 Oct 2011 11:27:15 -0700 Subject: [PATCH] TRIM support Signed-off-by: Samuel J. Greear --- etc/rc.d/fsck | 2 +- etc/rc.d/swap1 | 3 +- sbin/fdisk/fdisk.8 | 9 ++ sbin/fdisk/fdisk.c | 69 ++++++++- sbin/hammer/hammer_util.h | 1 + sbin/mount/mntopts.h | 1 + sbin/mount/mount.8 | 4 + sbin/mount/mount_ufs.c | 23 +++ sbin/newfs/mkfs.c | 29 ++++ sbin/newfs/newfs.8 | 6 +- sbin/newfs/newfs.c | 37 ++++- sbin/newfs_hammer/newfs_hammer.8 | 6 +- sbin/newfs_hammer/newfs_hammer.c | 61 +++++++- sbin/newfs_hammer/newfs_hammer.h | 1 + sbin/swapon/swapon.8 | 10 +- sbin/swapon/swapon.c | 114 +++++++++++++-- sys/bus/cam/cam_ccb.h | 2 + sys/bus/cam/cam_xpt.c | 1 + sys/bus/cam/scsi/scsi_all.h | 1 + sys/bus/cam/scsi/scsi_da.c | 239 ++++++++++++++++++++++++++++++- sys/dev/disk/ahci/ahci_cam.c | 52 +++++++ sys/dev/disk/ahci/atascsi.h | 14 +- sys/kern/vfs_bio.c | 2 +- sys/kern/vfs_syscalls.c | 4 +- sys/sys/buf.h | 1 + sys/sys/disk.h | 1 + sys/sys/ioctl_compat.h | 1 + sys/sys/mount.h | 3 +- sys/vfs/ufs/ffs_alloc.c | 107 ++++++++++++-- sys/vfs/ufs/ffs_vfsops.c | 1 + 30 files changed, 764 insertions(+), 41 deletions(-) diff --git a/etc/rc.d/fsck b/etc/rc.d/fsck index 9e119bf0cf..b48878bbc5 100644 --- a/etc/rc.d/fsck +++ b/etc/rc.d/fsck @@ -6,7 +6,7 @@ # # PROVIDE: fsck -# REQUIRE: localswap +# REQUIRE: disks . /etc/rc.subr diff --git a/etc/rc.d/swap1 b/etc/rc.d/swap1 index b88b9aa761..0f123696ca 100644 --- a/etc/rc.d/swap1 +++ b/etc/rc.d/swap1 @@ -6,7 +6,8 @@ # # PROVIDE: localswap -# REQUIRE: disks +# REQUIRE: savecore +# BEFORE: SERVERS # KEYWORD: shutdown . /etc/rc.subr diff --git a/sbin/fdisk/fdisk.8 b/sbin/fdisk/fdisk.8 index 18e7882ccc..c2930debec 100644 --- a/sbin/fdisk/fdisk.8 +++ b/sbin/fdisk/fdisk.8 @@ -89,6 +89,15 @@ would otherwise wrap. This typically causes BIOSes to properly detect that the disk should be put in Large mode. This option may be needed on very old PCs. +.It Fl E +Use TRIM to erase the device/partition before creating the file system. The +underlying device must have the Trim sysctl enabled. Only devices that +support TRIM will have such a sysctl option (kern.cam.da.X.trim_enabled). For +use with the +.Fl I +or +.Fl u +option. .It Fl f Ar configfile Set slice values using the file .Ar configfile . diff --git a/sbin/fdisk/fdisk.c b/sbin/fdisk/fdisk.c index e28a066b3f..2ce0c6f7d2 100644 --- a/sbin/fdisk/fdisk.c +++ b/sbin/fdisk/fdisk.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include #include #include @@ -120,6 +122,7 @@ typedef struct cmd { static int B_flag = 0; /* replace boot code */ static int C_flag = 0; /* use wrapped values for CHS */ +static int E_flag = 0; /* Erase through TRIM */ static int I_flag = 0; /* use entire disk for DragonFly */ static int a_flag = 0; /* set active partition */ static char *b_flag = NULL; /* path to boot code */ @@ -235,6 +238,7 @@ static void change_code(); static void get_params_to_use(); static void dos(struct dos_partition *partp); static int open_disk(int u_flag); +static void erase_partition(int i); static ssize_t read_disk(off_t sector, void *buf); static ssize_t write_disk(off_t sector, void *buf); static int get_params(); @@ -258,7 +262,7 @@ main(int argc, char *argv[]) { int c, i; - while ((c = getopt(argc, argv, "BCIab:f:p:istuv1234")) != -1) + while ((c = getopt(argc, argv, "BCEIab:f:p:istuv1234")) != -1) switch (c) { case 'B': B_flag = 1; @@ -266,6 +270,9 @@ main(int argc, char *argv[]) case 'C': C_flag = 1; break; + case 'E': + E_flag = 1; + break; case 'I': I_flag = 1; break; @@ -384,6 +391,12 @@ main(int argc, char *argv[]) dos(partp); if (v_flag) print_s0(-1); + + if (E_flag) { + /* Trim now if we're using the entire device */ + erase_partition(0); + } + if (!t_flag) write_s0(); exit(0); @@ -444,8 +457,20 @@ main(int argc, char *argv[]) } print_s0(-1); if (!t_flag) { - if (ok("Should we write new partition table?")) + if (ok("Should we write new partition table?")) { + if (E_flag && u_flag) { + /* + * Trim now because we've committed to + * updating the partition. + */ + if (partition == -1) + for (i = 0; i < NDOSPART; i++) + erase_partition(i); + else + erase_partition(partition); + } write_s0(); + } } else { @@ -762,6 +787,46 @@ dos(struct dos_partition *partp) int fd; +static void +erase_partition(int i) +{ + struct dos_partition *partp; + off_t ioarg[2]; + + char sysctl_name[64]; + int trim_enabled = 0; + size_t olen = sizeof(trim_enabled); + char *dev_name = strdup(disk); + + dev_name = strtok(dev_name + strlen("/dev/da"),"s"); + sprintf(sysctl_name, "kern.cam.da.%s.trim_enabled", dev_name); + sysctlbyname(sysctl_name, &trim_enabled, &olen, NULL, 0); + if(errno == ENOENT) { + printf("Device:%s does not support the TRIM command\n", disk); + usage(); + } + if(!trim_enabled) { + printf("Erase device option selected, but sysctl (%s) " + "is not enabled\n",sysctl_name); + usage(); + } + partp = ((struct dos_partition *) &mboot.parts) + i; + printf("erase sectors:%u %u\n", + partp->dp_start, + partp->dp_size); + + /* Trim the Device */ + ioarg[0] = partp->dp_start; + ioarg[0] *=secsize; + ioarg[1] = partp->dp_size; + ioarg[1] *=secsize; + + if (ioctl(fd, IOCTLTRIM, ioarg) < 0) { + printf("Device trim failed\n"); + usage (); + } +} + /* Getting device status */ static int diff --git a/sbin/hammer/hammer_util.h b/sbin/hammer/hammer_util.h index 5c5845f59e..678a189c7a 100644 --- a/sbin/hammer/hammer_util.h +++ b/sbin/hammer/hammer_util.h @@ -80,6 +80,7 @@ struct volume_info { char *name; int fd; off_t size; + off_t device_offset; const char *type; struct hammer_volume_ondisk *ondisk; diff --git a/sbin/mount/mntopts.h b/sbin/mount/mntopts.h index 5a6215ae1e..b7533afa79 100644 --- a/sbin/mount/mntopts.h +++ b/sbin/mount/mntopts.h @@ -52,6 +52,7 @@ struct mntopt { #define MOPT_NOEXEC { "exec", 1, MNT_NOEXEC, 0 } #define MOPT_NOSUID { "suid", 1, MNT_NOSUID, 0 } #define MOPT_NOSYMFOLLOW { "symfollow", 1, MNT_NOSYMFOLLOW, 0 } +#define MOPT_TRIM { "trim", 0, MNT_TRIM, 0 } #define MOPT_RDONLY { "rdonly", 0, MNT_RDONLY, 0 } #define MOPT_SYNC { "sync", 0, MNT_SYNCHRONOUS, 0 } #define MOPT_UNION { "union", 0, MNT_UNION, 0 } diff --git a/sbin/mount/mount.8 b/sbin/mount/mount.8 index 438fc771ee..fba3eab86e 100644 --- a/sbin/mount/mount.8 +++ b/sbin/mount/mount.8 @@ -193,6 +193,10 @@ mount the file system read-only (even the super-user may not write it). All .Tn I/O to the file system should be done synchronously. +.It Cm trim +If the device supports trim (kern.cam.da.X.trim_enabled exists) and is set, +the file system will perform online trim for corresponding block deletions. +Currently UFS only supports this feature. .It Cm suiddir A directory on the mounted filesystem will respond to the SUID bit being set, by setting the owner of any new files to be the same diff --git a/sbin/mount/mount_ufs.c b/sbin/mount/mount_ufs.c index 1af7efabb3..fc0d88b731 100644 --- a/sbin/mount/mount_ufs.c +++ b/sbin/mount/mount_ufs.c @@ -38,11 +38,13 @@ #include #include +#include #include #include #include #include +#include #include #include @@ -59,6 +61,7 @@ static struct mntopt mopts[] = { MOPT_SYNC, MOPT_UPDATE, MOPT_IGNORE, + MOPT_TRIM, MOPT_NULL }; @@ -98,6 +101,26 @@ mount_ufs(int argc, const char **argv) else args.export.ex_flags = 0; + if (mntflags & MNT_TRIM){ + char sysctl_name[64]; + int trim_enabled = 0; + size_t olen = sizeof(trim_enabled); + char *dev_name = strdup(args.fspec); + dev_name = strtok(dev_name + strlen("/dev/da"),"s"); + sprintf(sysctl_name, "kern.cam.da.%s.trim_enabled", dev_name); + sysctlbyname(sysctl_name, &trim_enabled, &olen, NULL, 0); + if(errno == ENOENT) { + printf("Device:%s does not support the TRIM command\n", + args.fspec); + ufs_usage(); + } + if(!trim_enabled) { + printf("Online TRIM selected, but sysctl (%s) " + "is not enabled\n",sysctl_name); + ufs_usage(); + } + } + error = getvfsbyname("ufs", &vfc); if (error && vfsisloadable("ufs")) { if (vfsload("ufs")) { diff --git a/sbin/newfs/mkfs.c b/sbin/newfs/mkfs.c index c00768904f..a08f9f6065 100644 --- a/sbin/newfs/mkfs.c +++ b/sbin/newfs/mkfs.c @@ -38,6 +38,7 @@ #include "defs.h" #include +#include /* * make file system for cylinder-group style file systems @@ -70,6 +71,8 @@ extern int Lflag; /* add a volume label */ extern int Nflag; /* run mkfs without writing file system */ extern int Oflag; /* format as an 4.3BSD file system */ extern int Uflag; /* enable soft updates for file system */ +extern int Eflag; /* erase contents using TRIM */ +extern uint64_t slice_offset; /* Pysical device slice offset */ extern u_long fssize; /* file system size */ extern int ntracks; /* # tracks/cylinder */ extern int nsectors; /* # sectors/track */ @@ -136,6 +139,7 @@ void parentready(int); void rdfs(daddr_t, int, char *); void setblock(struct fs *, unsigned char *, int); void started(int); +void erfs(off_t, off_t); void wtfs(daddr_t, int, char *); void wtfsflush(void); @@ -236,6 +240,7 @@ mkfs(char *fsys, int fi, int fo, const char *mfscopy) sblock.fs_flags |= FS_DOSOFTDEP; if (Lflag) strlcpy(sblock.fs_volname, volumelabel, MAXVOLLEN); + /* * Validate the given file system size. * Verify that its last block can actually be accessed. @@ -677,6 +682,16 @@ next: sblock.fs_flags & FS_DOSOFTDEP ? " SOFTUPDATES" : ""); #undef B2MBFACTOR } + + if (Eflag && !Nflag) { + printf("Erasing sectors [%ld --- %ld]\n", + (SBOFF+ slice_offset)/sectorsize, + fsbtodb(&sblock,sblock.fs_size) - + ((SBOFF + slice_offset)/ sectorsize) - 1); + erfs(SBOFF+ slice_offset, (fsbtodb(&sblock,sblock.fs_size) - + ((SBOFF + slice_offset)/ sectorsize) - 1) * + (unsigned long long)sectorsize); + } /* * Now build the cylinders group blocks and * then print out indices of cylinder groups. @@ -1245,6 +1260,20 @@ wtfsflush(void) } } +/* + * Issue ioctl to erase range of sectors using TRIM + */ +void +erfs(off_t byte_start, off_t size) +{ + off_t ioarg[2]; + ioarg[0] = byte_start; + ioarg[1] = size; + if (ioctl(fsi, IOCTLTRIM, ioarg) < 0) { + err(37, "Device trim failed\n"); + } +} + /* * write a block to the file system */ diff --git a/sbin/newfs/newfs.8 b/sbin/newfs/newfs.8 index 693eb66ee0..cee48f3196 100644 --- a/sbin/newfs/newfs.8 +++ b/sbin/newfs/newfs.8 @@ -43,7 +43,7 @@ .Sh SYNOPSIS .Nm .Op Fl L Ar volname -.Op Fl NCOU +.Op Fl NCOURE .Op Fl S Ar sector-size .Op Fl T Ar disktype .Op Fl a Ar maxcontig @@ -172,6 +172,10 @@ instead of trying to get geometry information from the storage device. .It Fl U Enables soft updates on the new filesystem. +.It Fl E +Use TRIM to erase the device's data before creating the file system. The +underlying device must have the Trim sysctl enabled. Only devices that support +TRIM will have such a sysctl option (kern.cam.da.X.trim_enabled). .It Fl a Ar maxcontig Specify the maximum number of contiguous blocks that will be laid out before forcing a rotational delay (see the diff --git a/sbin/newfs/newfs.c b/sbin/newfs/newfs.c index 714b750b9c..9be2947a8c 100644 --- a/sbin/newfs/newfs.c +++ b/sbin/newfs/newfs.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -163,6 +164,8 @@ int Nflag; /* run without writing file system */ int Oflag; /* format as an 4.3BSD file system */ int Cflag; /* copy underlying filesystem (mfs only) */ int Uflag; /* enable soft updates for file system */ +int Eflag; /* erase contents using TRIM */ +uint64_t slice_offset; /* Pysical device slice offset */ u_long fssize; /* file system size */ int ntracks = NTRACKS; /* # tracks/cylinder */ int nsectors = NSECTORS; /* # sectors/track */ @@ -237,9 +240,12 @@ main(int argc, char **argv) opstring = mfs ? "L:NCF:T:Ua:b:c:d:e:f:g:h:i:m:o:s:v" : - "L:NOS:T:Ua:b:c:d:e:f:g:h:i:k:l:m:n:o:p:r:s:t:u:vx:"; + "L:NREOS:T:Ua:b:c:d:e:f:g:h:i:k:l:m:n:o:p:r:s:t:u:vx:"; while ((ch = getopt(argc, argv, opstring)) != -1) { switch (ch) { + case 'E': + Eflag = 1; + break; case 'L': volumelabel = optarg; i = -1; @@ -425,6 +431,30 @@ main(int argc, char **argv) if (stat(special, &st) < 0 && special[0] && special[0] != '/') asprintf(&special, "/dev/%s", special); + if (Eflag) { + char sysctl_name[64]; + int trim_enabled = 0; + size_t olen = sizeof(trim_enabled); + char *dev_name = strdup(special); + + dev_name = strtok(dev_name + strlen("/dev/da"),"s"); + sprintf(sysctl_name, "kern.cam.da.%s.trim_enabled", + dev_name); + + sysctlbyname(sysctl_name, &trim_enabled, &olen, NULL, 0); + + if(errno == ENOENT) { + printf("Device:%s does not support the TRIM command\n", + special); + usage(); + } + if(!trim_enabled) { + printf("Erase device option selected, but sysctl (%s) " + "is not enabled\n",sysctl_name); + usage(); + + } + } if (Nflag) { fso = -1; } else { @@ -505,6 +535,7 @@ main(int argc, char **argv) /* geom.d_ncylinders not used */ geom.d_media_blocks = pinfo.media_blocks; geom.d_media_size = pinfo.media_size; + slice_offset = pinfo.media_offset; } if (geom.d_media_blocks == 0 || geom.d_media_size == 0) { fatal("%s: is unavailable", argv[0]); @@ -698,7 +729,7 @@ fatal(const char *fmt, ...) /*NOTREACHED*/ } -static void +void usage(void) { if (mfs) { @@ -716,10 +747,12 @@ usage(void) #endif fprintf(stderr, "where fsoptions are:\n"); fprintf(stderr, "\t-C (mfs) Copy the underlying filesystem to the MFS mount\n"); + fprintf(stderr, "\t-E erase file system contents using TRIM\n"); fprintf(stderr, "\t-L volume name\n"); fprintf(stderr, "\t-N do not create file system, just print out parameters\n"); fprintf(stderr, "\t-O create a 4.3BSD format filesystem\n"); + fprintf(stderr, "\t-R enable TRIM\n"); fprintf(stderr, "\t-S sector size\n"); #ifdef COMPAT fprintf(stderr, "\t-T disktype\n"); diff --git a/sbin/newfs_hammer/newfs_hammer.8 b/sbin/newfs_hammer/newfs_hammer.8 index 4cc049d0e8..17894262e9 100644 --- a/sbin/newfs_hammer/newfs_hammer.8 +++ b/sbin/newfs_hammer/newfs_hammer.8 @@ -40,7 +40,7 @@ .Sh SYNOPSIS .Nm .Fl L Ar label -.Op Fl f +.Op Fl fE .Op Fl b Ar bootsize .Op Fl m Ar savesize .Op Fl u Ar undosize @@ -119,6 +119,10 @@ This is needed for the creation of a file system less than 10GB size or with less than 500MB UNDO/REDO FIFO. This should not be used under normal circumstances. +.It Fl E +Use TRIM to erase the device's data before creating the file system. The +underlying device must have the Trim sysctl enabled. Only devices that support +TRIM will have such a sysctl option (kern.cam.da.X.trim_enabled). .It Fl m Ar savesize Specify a fixed area which .Nm HAMMER diff --git a/sbin/newfs_hammer/newfs_hammer.c b/sbin/newfs_hammer/newfs_hammer.c index 19146b34c5..87f854417a 100644 --- a/sbin/newfs_hammer/newfs_hammer.c +++ b/sbin/newfs_hammer/newfs_hammer.c @@ -38,6 +38,7 @@ static int64_t getsize(const char *str, int64_t minval, int64_t maxval, int pw); static const char *sizetostr(off_t size); +static void trim_volume(struct volume_info *vol); static void check_volume(struct volume_info *vol); static void format_volume(struct volume_info *vol, int nvols,const char *label, off_t total_size); @@ -47,6 +48,7 @@ static void usage(void); static int ForceOpt = 0; static int HammerVersion = -1; +static int Eflag = 0; #define GIG (1024LL*1024*1024) @@ -82,11 +84,14 @@ main(int ac, char **av) /* * Parse arguments */ - while ((ch = getopt(ac, av, "fL:b:m:u:V:")) != -1) { + while ((ch = getopt(ac, av, "fEL:b:m:u:V:")) != -1) { switch(ch) { case 'f': ForceOpt = 1; break; + case 'E': + Eflag = 1; + break; case 'L': label = optarg; break; @@ -189,6 +194,30 @@ main(int ac, char **av) * its remaining fields. */ check_volume(vol); + if (Eflag) { + char sysctl_name[64]; + int trim_enabled = 0; + size_t olen = sizeof(trim_enabled); + char *dev_name = strdup(vol->name); + dev_name = strtok(dev_name + strlen("/dev/da"),"s"); + + sprintf(sysctl_name, "kern.cam.da.%s.trim_enabled", + dev_name); + errno=0; + sysctlbyname(sysctl_name, &trim_enabled, &olen, NULL, 0); + if(errno == ENOENT) { + printf("Device:%s (%s) does not support the " + "TRIM command\n", vol->name,sysctl_name); + usage(); + } + if(!trim_enabled) { + printf("Erase device option selected, but " + "sysctl (%s) is not enabled\n", sysctl_name); + usage(); + + } + trim_volume(vol); + } total += vol->size; } @@ -273,7 +302,7 @@ void usage(void) { fprintf(stderr, - "usage: newfs_hammer -L label [-f] [-b bootsize] [-m savesize] [-u undosize]\n" + "usage: newfs_hammer -L label [-fE] [-b bootsize] [-m savesize] [-u undosize]\n" " [-V version] special ...\n" ); exit(1); @@ -391,6 +420,29 @@ nowtime(void) return(xtime); } +/* + * TRIM the volume, but only if the backing store is a DEVICE + */ +static +void +trim_volume(struct volume_info *vol) +{ + if (strncmp(vol->type, "DEVICE", sizeof(vol->type)) == 0) { + off_t ioarg[2]; + + /* 1MB offset to prevent destroying disk-reserved area */ + ioarg[0] = vol->device_offset; + ioarg[1] = vol->size; + printf("Trimming Device:%s, sectors (%llu -%llu)\n",vol->name, + (unsigned long long)ioarg[0]/512, + (unsigned long long)ioarg[1]/512); + if (ioctl(vol->fd, IOCTLTRIM, ioarg) < 0) { + printf("Device trim failed\n"); + usage (); + } + } +} + /* * Check basic volume characteristics. HAMMER filesystems use a minimum * of a 16KB filesystem buffer size. @@ -416,6 +468,10 @@ check_volume(struct volume_info *vol) err(1, "Unable to stat %s", vol->name); vol->size = st.st_size; vol->type = "REGFILE"; + + if (Eflag) + errx(1,"Cannot TRIM regular file %s\n", vol->name); + } else { /* * When formatting a block device as a HAMMER volume the @@ -433,6 +489,7 @@ check_volume(struct volume_info *vol) } vol->size = pinfo.media_size; + vol->device_offset = pinfo.media_offset; vol->type = "DEVICE"; } printf("Volume %d %s %-15s size %s\n", diff --git a/sbin/newfs_hammer/newfs_hammer.h b/sbin/newfs_hammer/newfs_hammer.h index 271e4fd674..c134bf376a 100644 --- a/sbin/newfs_hammer/newfs_hammer.h +++ b/sbin/newfs_hammer/newfs_hammer.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include diff --git a/sbin/swapon/swapon.8 b/sbin/swapon/swapon.8 index 9bf233865a..181a861780 100644 --- a/sbin/swapon/swapon.8 +++ b/sbin/swapon/swapon.8 @@ -40,10 +40,10 @@ .Nm swapon , swapoff , swapctl .Nd "specify devices for paging and swapping" .Sh SYNOPSIS -.Nm swapon Fl aq | Ar +.Nm swapon Fl aeq | Ar .Nm swapoff Fl aq | Ar .Nm swapctl -.Op Fl AghklmsU +.Op Fl AeghklmsU .Oo .Fl a Ar | @@ -78,6 +78,10 @@ If the .Fl q option is used informational messages will not be written to standard output when a swap device is added. +If the +.Fl e +option is used, the device will be trimmed if +it supports trim and the trim_enabled sysctl is on. .Pp The .Nm swapoff @@ -154,6 +158,8 @@ Output values in megabytes. List the devices making up system swap. .It Fl s Print a summary line for system swap. +.It Fl e +Attempts to Trim the device if -[Aa] is used. .Pp The .Ev BLOCKSIZE diff --git a/sbin/swapon/swapon.c b/sbin/swapon/swapon.c index 7dbfcf9aae..3a119e0e50 100644 --- a/sbin/swapon/swapon.c +++ b/sbin/swapon/swapon.c @@ -38,6 +38,8 @@ #include #include #include +#include +#include #include #include @@ -51,7 +53,7 @@ #include static void usage(void); -static int swap_on_off(char *name, int doingall); +static int swap_on_off(char *name, int doingall, int trim); static void swaplist(int lflag, int sflag, int hflag); enum { SWAPON, SWAPOFF, SWAPCTL } orig_prog, which_prog = SWAPCTL; @@ -63,7 +65,7 @@ main(int argc, char **argv) char *ptr; int ret; int ch; - int doall, sflag, lflag, hflag, qflag; + int doall, sflag, lflag, hflag, qflag, eflag; if ((ptr = strrchr(argv[0], '/')) == NULL) ptr = argv[0]; @@ -73,8 +75,8 @@ main(int argc, char **argv) which_prog = SWAPOFF; orig_prog = which_prog; - sflag = lflag = hflag = qflag = doall = 0; - while ((ch = getopt(argc, argv, "AadghklmqsU")) != -1) { + sflag = lflag = hflag = qflag = doall = eflag = 0; + while ((ch = getopt(argc, argv, "AadeghklmqsU")) != -1) { switch((char)ch) { case 'A': if (which_prog == SWAPCTL) { @@ -96,6 +98,9 @@ main(int argc, char **argv) else usage(); break; + case 'e': + eflag = 1; + break; case 'g': hflag = 'G'; break; @@ -141,7 +146,7 @@ main(int argc, char **argv) continue; if (strstr(fsp->fs_mntops, "noauto")) continue; - if (swap_on_off(fsp->fs_spec, 1)) { + if (swap_on_off(fsp->fs_spec, 1, eflag)) { ret = 1; } else { if (!qflag) { @@ -156,7 +161,7 @@ main(int argc, char **argv) usage(); } for (; *argv; ++argv) { - if (swap_on_off(getdevpath(*argv, 0), 0)) { + if (swap_on_off(getdevpath(*argv, 0), 0, eflag)) { ret = 1; } else if (orig_prog == SWAPCTL) { printf("%s: %sing %s as swap device\n", @@ -174,9 +179,100 @@ main(int argc, char **argv) exit(ret); } +/* + * TRIM the device + */ +static +void +trim_volume(char * name) +{ + struct partinfo pinfo; + int fd,i,n; + size_t bytes = 0,ksize; + char *xswbuf; + struct xswdev *xsw; + + + /* + * Determine if this device is already being used by swap without + * calling swapon(). + */ + if ((sysctlbyname("vm.swap_info_array", NULL, &bytes, NULL, 0) < 0) || + bytes == 0) { + err(1, "sysctlbyname()"); + } + + xswbuf = malloc(bytes); + if ((sysctlbyname("vm.swap_info_array", xswbuf, &bytes, NULL, 0) < 0) || + bytes == 0) { + free(xswbuf); + err(1, "sysctlbyname()"); + } + + ksize = ((struct xswdev *)xswbuf)->xsw_size; + n = (int)(bytes / ksize); + for (i = 0; i < n; ++i) { + xsw = (void *)((char *)xswbuf + i * ksize); + + if (xsw->xsw_dev == NODEV ) + continue; + if(!strcmp(devname(xsw->xsw_dev, S_IFCHR), + name + strlen("/dev/"))) { + warnx("%s: device already a swap device", name); + exit(1); + } + } + + /* + * Get the size and offset of this parititon/device + */ + fd = open(name, O_RDWR); + if (fd < 0) + err(1, "Unable to open %s R+W", name); + if (ioctl(fd, DIOCGPART, &pinfo) < 0) { + printf("Cannot trim regular file\n"); + usage (); + } + off_t ioarg[2]; + + /*Trim the Device*/ + ioarg[0] = pinfo.media_offset; + ioarg[1] = pinfo.media_size; + printf("Trimming Device:%s, sectors (%llu -%llu)\n",name, + (unsigned long long)ioarg[0]/512, + (unsigned long long)ioarg[1]/512); + if (ioctl(fd, IOCTLTRIM, ioarg) < 0) { + printf("Device trim failed\n"); + usage (); + } + close(fd); +} + static int -swap_on_off(char *name, int doingall) +swap_on_off(char *name, int doingall, int trim) { + if (which_prog == SWAPON && trim){ + char sysctl_name[64]; + int trim_enabled = 0; + size_t olen = sizeof(trim_enabled); + char *dev_name = strdup(name); + dev_name = strtok(dev_name + strlen("/dev/da"),"s"); + sprintf(sysctl_name, "kern.cam.da.%s.trim_enabled", dev_name); + sysctlbyname(sysctl_name, &trim_enabled, &olen, NULL, 0); + if(errno == ENOENT) { + printf("Device:%s does not support the TRIM command\n", + name); + usage(); + } + if(!trim_enabled) { + printf("Erase device option selected, but sysctl (%s) " + "is not enabled\n",sysctl_name); + usage(); + } + + trim_volume(name); + + } if ((which_prog == SWAPOFF ? swapoff(name) : swapon(name)) == -1) { switch(errno) { case EBUSY: @@ -205,10 +301,10 @@ usage(void) switch (orig_prog) { case SWAPON: case SWAPOFF: - fprintf(stderr, "-aq | file ...\n"); + fprintf(stderr, "-aeq | file ...\n"); break; case SWAPCTL: - fprintf(stderr, "[-AghklmsU] [-a file ... | -d file ...]\n"); + fprintf(stderr, "[-AeghklmsU] [-a file ... | -d file ...]\n"); break; } exit(1); diff --git a/sys/bus/cam/cam_ccb.h b/sys/bus/cam/cam_ccb.h index 71e1b3daf6..c19d5ca7de 100644 --- a/sys/bus/cam/cam_ccb.h +++ b/sys/bus/cam/cam_ccb.h @@ -188,6 +188,8 @@ typedef enum { /* Notify Host Target driver of event */ XPT_NOTIFY_ACK = 0x35, /* Acknowledgement of event */ + XPT_TRIM = 0x36 | XPT_FC_DEV_QUEUED, + /* TRIM */ /* Vendor Unique codes: 0x80->0x8F */ XPT_VUNIQUE = 0x80 diff --git a/sys/bus/cam/cam_xpt.c b/sys/bus/cam/cam_xpt.c index 92b4e3799c..14813ce881 100644 --- a/sys/bus/cam/cam_xpt.c +++ b/sys/bus/cam/cam_xpt.c @@ -2962,6 +2962,7 @@ xpt_action(union ccb *start_ccb) switch (start_ccb->ccb_h.func_code) { case XPT_SCSI_IO: + case XPT_TRIM: { struct cam_ed *device; #ifdef CAMDEBUG diff --git a/sys/bus/cam/scsi/scsi_all.h b/sys/bus/cam/scsi/scsi_all.h index 27bc17c638..c722f31d71 100644 --- a/sys/bus/cam/scsi/scsi_all.h +++ b/sys/bus/cam/scsi/scsi_all.h @@ -600,6 +600,7 @@ struct ata_pass_16 { #define WRITE_BUFFER 0x3b #define READ_BUFFER 0x3c #define CHANGE_DEFINITION 0x40 +#define TRIM 0x42 #define LOG_SELECT 0x4c #define LOG_SENSE 0x4d #define MODE_SELECT_10 0x55 diff --git a/sys/bus/cam/scsi/scsi_da.c b/sys/bus/cam/scsi/scsi_da.c index 3ac2cba36d..574c8797b0 100644 --- a/sys/bus/cam/scsi/scsi_da.c +++ b/sys/bus/cam/scsi/scsi_da.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -96,7 +97,8 @@ typedef enum { DA_FLAG_OPEN = 0x100, DA_FLAG_SCTX_INIT = 0x200, DA_FLAG_RD_LIMIT = 0x400, - DA_FLAG_WR_LIMIT = 0x800 + DA_FLAG_WR_LIMIT = 0x800, + DA_FLAG_CAN_TRIM = 0x1000 } da_flags; typedef enum { @@ -112,6 +114,7 @@ typedef enum { DA_CCB_BUFFER_IO = 0x03, DA_CCB_WAITING = 0x04, DA_CCB_DUMP = 0x05, + DA_CCB_TRIM = 0x06, DA_CCB_TYPE_MASK = 0x0F, DA_CCB_RETRY_UA = 0x10 } da_ccb_state; @@ -128,9 +131,17 @@ struct disk_params { u_int64_t sectors; /* total number sectors */ }; +#define TRIM_MAX_BLOCKS 8 +#define TRIM_MAX_RANGES TRIM_MAX_BLOCKS * 64 +struct trim_request { + uint8_t data[TRIM_MAX_RANGES * 8]; + struct bio *bios[TRIM_MAX_RANGES]; +}; + struct da_softc { struct bio_queue_head bio_queue_rd; struct bio_queue_head bio_queue_wr; + struct bio_queue_head bio_queue_trim; struct devstat device_stats; SLIST_ENTRY(da_softc) links; LIST_HEAD(, ccb_hdr) pending_ccbs; @@ -141,6 +152,9 @@ struct da_softc { int ordered_tag_count; int outstanding_cmds_rd; int outstanding_cmds_wr; + int trim_max_ranges; + int trim_running; + int trim_enabled; struct disk_params params; struct disk disk; union ccb saved_ccb; @@ -148,6 +162,7 @@ struct da_softc { struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; struct callout sendordered_c; + struct trim_request trim_req; }; struct da_quirk_entry { @@ -323,6 +338,7 @@ static d_open_t daopen; static d_close_t daclose; static d_strategy_t dastrategy; static d_dump_t dadump; +static d_ioctl_t daioctl; static periph_init_t dainit; static void daasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg); @@ -404,13 +420,93 @@ static struct dev_ops da_ops = { .d_read = physread, .d_write = physwrite, .d_strategy = dastrategy, - .d_dump = dadump + .d_dump = dadump, + .d_ioctl = daioctl }; static struct extend_array *daperiphs; MALLOC_DEFINE(M_SCSIDA, "scsi_da", "scsi_da buffers"); +static int +daioctl(struct dev_ioctl_args *ap) +{ + int unit; + int error = 0; + struct buf *bp; + struct cam_periph *periph; + int byte_count; + struct da_softc * softc; + + off_t *del_num = (off_t*)ap->a_data; + off_t bytes_left; + off_t bytes_start; + + cdev_t dev = ap->a_head.a_dev; + + + unit = dkunit(dev); + periph = cam_extend_get(daperiphs, unit); + if (periph == NULL) + return(ENXIO); + softc = (struct da_softc *)periph->softc; + + switch (ap->a_cmd) { + case IOCTLTRIM: + { + + bytes_left = del_num[1]; + bytes_start = del_num[0]; + + /* TRIM occurs on 512-byte sectors. */ + KKASSERT((bytes_left % 512) == 0); + KKASSERT((bytes_start% 512) == 0); + + + /* Break TRIM up into int-sized commands because of b_bcount */ + while(bytes_left) { + + /* + * Rather than than squezing out more blocks in b_bcount + * and having to break up the TRIM request in da_start(), + * we ensure we can always TRIM this many bytes with one + * TRIM command (this happens if the device only + * supports one TRIM block). + * + * With min TRIM blksize of 1, TRIM command free + * 4194240 blks(64*65535): each LBA range can address + * 65535 blks and there 64 such ranges in a 512-byte + * block. And, 4194240 * 512 = 0x7FFF8000 + * + */ + byte_count = MIN(bytes_left,0x7FFF8000); + bp = getnewbuf(0,0,0,1); + + bp->b_cmd = BUF_CMD_FREEBLKS; + bp->b_bio1.bio_offset = bytes_start; + bp->b_bcount = byte_count; + bp->b_bio1.bio_flags |= BIO_SYNC; + bp->b_bio1.bio_done = biodone_sync; + + dev_dstrategy(ap->a_head.a_dev, &bp->b_bio1); + + if (biowait(&bp->b_bio1, "TRIM")) { + kprintf("Error:%d\n", bp->b_error); + return(bp->b_error ? bp->b_error : EIO); + } + brelse(bp); + bytes_left -= byte_count; + bytes_start += byte_count; + } + break; + } + default: + return(EINVAL); + } + + return(error); +} + static int daopen(struct dev_open_args *ap) { @@ -643,6 +739,8 @@ dastrategy(struct dev_strategy_args *ap) */ if (bp->b_cmd == BUF_CMD_WRITE || bp->b_cmd == BUF_CMD_FLUSH) bioqdisksort(&softc->bio_queue_wr, bio); + else if (bp->b_cmd == BUF_CMD_FREEBLKS) + bioqdisksort(&softc->bio_queue_trim, bio); else bioqdisksort(&softc->bio_queue_rd, bio); @@ -819,6 +917,7 @@ daoninvalidate(struct cam_periph *periph) * XXX Handle any transactions queued to the card * with XPT_ABORT_CCB. */ + daflushbioq(&softc->bio_queue_trim, ENXIO); daflushbioq(&softc->bio_queue_wr, ENXIO); daflushbioq(&softc->bio_queue_rd, ENXIO); xpt_print(periph->path, "lost device\n"); @@ -978,6 +1077,18 @@ dasysctlinit(void *context, int pending) &softc->minimum_cmd_size, 0, dacmdsizesysctl, "I", "Minimum CDB size"); + /* Only create the option if the device supports TRIM */ + if (softc->disk.d_info.d_trimflag) { + SYSCTL_ADD_INT(&softc->sysctl_ctx, + SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, + "trim_enabled", + CTLFLAG_RW, + &softc->trim_enabled, + 0, + "Enable TRIM for this device (SSD))"); + } + cam_periph_release(periph); rel_mplock(); } @@ -1037,6 +1148,7 @@ daregister(struct cam_periph *periph, void *arg) softc = kmalloc(sizeof(*softc), M_DEVBUF, M_INTWAIT | M_ZERO); LIST_INIT(&softc->pending_ccbs); softc->state = DA_STATE_PROBE; + bioq_init(&softc->bio_queue_trim); bioq_init(&softc->bio_queue_rd); bioq_init(&softc->bio_queue_wr); if (SID_IS_REMOVABLE(&cgd->inq_data)) @@ -1044,6 +1156,17 @@ daregister(struct cam_periph *periph, void *arg) if ((cgd->inq_data.flags & SID_CmdQue) != 0) softc->flags |= DA_FLAG_TAGGED_QUEUING; + /* Used to get TRIM status from AHCI driver */ + if (cgd->inq_data.vendor_specific1[0] == 1) { + /* + * max number of lba ranges an SSD can handle in a single + * TRIM command. vendor_specific1[1] is the num of 512-byte + * blocks the SSD reports that can be passed in a TRIM cmd. + */ + softc->trim_max_ranges = + min(cgd->inq_data.vendor_specific1[1] * 64, TRIM_MAX_RANGES); + } + periph->softc = softc; cam_extend_set(daperiphs, periph->unit_number, periph); @@ -1159,6 +1282,8 @@ daregister(struct cam_periph *periph, void *arg) (DA_DEFAULT_TIMEOUT * hz) / DA_ORDEREDTAG_INTERVAL, dasendorderedtag, softc); + + return(CAM_REQ_CMP); } @@ -1204,6 +1329,79 @@ dastart(struct cam_periph *periph, union ccb *start_ccb) break; } + /* Run the trim command if not already running */ + if (!softc->trim_running && + (bio = bioq_first(&softc->bio_queue_trim)) != 0) { + struct trim_request *req = &softc->trim_req; + struct bio *bio1; + int bps = 0, ranges = 0; + + softc->trim_running = 1; + bzero(req, sizeof(*req)); + bio1 = bio; + while (1) { + uint64_t lba; + int count; + + bp = bio1->bio_buf; + count = bp->b_bcount / softc->params.secsize; + lba = bio1->bio_offset/softc->params.secsize; + + kprintf("trim lba:%llu boff:%llu count:%d\n", + (unsigned long long) lba, + (unsigned long long) bio1->bio_offset, + count); + + bioq_remove(&softc->bio_queue_trim, bio1); + while (count > 0) { + int c = min(count, 0xffff); + int off = ranges * 8; + + req->data[off + 0] = lba & 0xff; + req->data[off + 1] = (lba >> 8) & 0xff; + req->data[off + 2] = (lba >> 16) & 0xff; + req->data[off + 3] = (lba >> 24) & 0xff; + req->data[off + 4] = (lba >> 32) & 0xff; + req->data[off + 5] = (lba >> 40) & 0xff; + req->data[off + 6] = c & 0xff; + req->data[off + 7] = (c >> 8) & 0xff; + lba += c; + count -= c; + ranges++; + } + + /* Try to merge multiple TRIM requests */ + req->bios[bps++] = bio1; + bio1 = bioq_first(&softc->bio_queue_trim); + if (bio1 == NULL || + bio1->bio_buf->b_bcount / softc->params.secsize > + (softc->trim_max_ranges - ranges) * 0xffff) + break; + } + + + cam_fill_csio(&start_ccb->csio, + 1/*retries*/, + dadone, + CAM_DIR_OUT, + MSG_SIMPLE_Q_TAG, + req->data, + ((ranges +63)/64)*512, + SSD_FULL_SIZE, + sizeof(struct scsi_rw_6), + da_default_timeout*2); + + start_ccb->ccb_h.ccb_state = DA_CCB_TRIM; + LIST_INSERT_HEAD(&softc->pending_ccbs, + &start_ccb->ccb_h, periph_links.le); + start_ccb->csio.ccb_h.func_code = XPT_TRIM; + start_ccb->ccb_h.ccb_bio = bio; + devstat_start_transaction(&softc->device_stats); + xpt_action(start_ccb); + xpt_schedule(periph, 1); + break; + } + /* * Select a read or write buffer to queue. Limit the number * of tags dedicated to reading or writing, giving reads @@ -1314,6 +1512,11 @@ dastart(struct cam_periph *periph, union ccb *start_ccb) ); } break; + case BUF_CMD_FREEBLKS: + if (softc->disk.d_info.d_trimflag & DA_FLAG_CAN_TRIM){ + start_ccb->csio.ccb_h.func_code = XPT_TRIM; + break; + } default: xpt_release_ccb(start_ccb); start_ccb = NULL; @@ -1461,6 +1664,7 @@ dadone(struct cam_periph *periph, union ccb *done_ccb) csio = &done_ccb->csio; switch (csio->ccb_h.ccb_state & DA_CCB_TYPE_MASK) { case DA_CCB_BUFFER_IO: + case DA_CCB_TRIM: { struct buf *bp; struct bio *bio; @@ -1555,7 +1759,28 @@ dadone(struct cam_periph *periph, union ccb *done_ccb) } devstat_end_transaction_buf(&softc->device_stats, bp); - biodone(bio); + if ((csio->ccb_h.ccb_state & DA_CCB_TYPE_MASK) == + DA_CCB_TRIM) { + struct trim_request *req = + (struct trim_request *) csio->data_ptr; + int i; + + for (i = 1; i < softc->trim_max_ranges && + req->bios[i]; i++) { + struct bio *bp1 = req->bios[i]; + + bp1->bio_buf->b_resid = bp->b_resid; + bp1->bio_buf->b_error = bp->b_error; + if (bp->b_flags & B_ERROR) + bp1->bio_buf->b_flags |= B_ERROR; + biodone(bp1); + } + softc->trim_running = 0; + biodone(bio); + xpt_schedule(periph,1); + } else + biodone(bio); + if (mustsched) xpt_schedule(periph, /*priority*/1); @@ -1618,6 +1843,7 @@ dadone(struct cam_periph *periph, union ccb *done_ccb) (uintmax_t)dp->sectors, dp->secsize, dp->heads, dp->secs_per_track, dp->cylinders); + CAM_SIM_UNLOCK(periph->sim); info.d_media_blksize = softc->params.secsize; info.d_media_blocks = softc->params.sectors; @@ -1737,6 +1963,13 @@ dadone(struct cam_periph *periph, union ccb *done_ccb) taskqueue_enqueue(taskqueue_thread[mycpuid], &softc->sysctl_task); } + + if (softc->trim_max_ranges) { + softc->disk.d_info.d_trimflag |= DA_FLAG_CAN_TRIM; + kprintf("%s%d: supports TRIM\n", + periph->periph_name, + periph->unit_number); + } softc->state = DA_STATE_NORMAL; /* * Since our peripheral may be invalidated by an error diff --git a/sys/dev/disk/ahci/ahci_cam.c b/sys/dev/disk/ahci/ahci_cam.c index 8db775dea7..8e61fbf4f7 100644 --- a/sys/dev/disk/ahci/ahci_cam.c +++ b/sys/dev/disk/ahci/ahci_cam.c @@ -958,6 +958,17 @@ ahci_xpt_action(struct cam_sim *sim, union ccb *ccb) break; } break; + case XPT_TRIM: + { + scsi_cdb_t cdb; + struct ccb_scsiio *csio; + csio = &ccb->csio; + cdb = (void *)((ccbh->flags & CAM_CDB_POINTER) ? + csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes); + cdb->generic.opcode = TRIM; + ahci_xpt_scsi_disk_io(ap, atx, ccb); + break; + } default: ccbh->status = CAM_REQ_INVALID; xpt_done(ccb); @@ -1067,6 +1078,17 @@ ahci_xpt_scsi_disk_io(struct ahci_port *ap, struct ata_port *atx, sizeof(rdata->inquiry_data.revision)); ccbh->status = CAM_REQ_CMP; } + + /* + * Use the vendor specific area to set the TRIM status + * for scsi_da + */ + if (at->at_identify.support_dsm) { + rdata->inquiry_data.vendor_specific1[0] = + at->at_identify.support_dsm &ATA_SUPPORT_DSM_TRIM; + rdata->inquiry_data.vendor_specific1[1] = + at->at_identify.max_dsm_blocks; + } break; case READ_CAPACITY_16: if (cdb->read_capacity_16.service_action != SRC16_SERVICE_ACTION) { @@ -1119,6 +1141,36 @@ ahci_xpt_scsi_disk_io(struct ahci_port *ap, struct ata_port *atx, xa->flags = 0; xa->complete = ahci_ata_complete_disk_synchronize_cache; break; + case TRIM: + fis = xa->fis; + fis->command = ATA_C_DATA_SET_MANAGEMENT; + fis->features = (u_int8_t)ATA_SF_DSM_TRIM; + fis->features_exp = (u_int8_t)(ATA_SF_DSM_TRIM>> 8); + + xa->flags = ATA_F_WRITE; + fis->flags = ATA_H2D_FLAGS_CMD; + + xa->data = csio->data_ptr; + xa->datalen = csio->dxfer_len; + xa->timeout = ccbh->timeout*50; /* milliseconds */ + + fis->sector_count =(u_int8_t)(xa->datalen/512); + fis->sector_count_exp =(u_int8_t)((xa->datalen/512)>>8); + + lba = 0; + fis->lba_low = (u_int8_t)lba; + fis->lba_mid = (u_int8_t)(lba >> 8); + fis->lba_high = (u_int8_t)(lba >> 16); + fis->lba_low_exp = (u_int8_t)(lba >> 24); + fis->lba_mid_exp = (u_int8_t)(lba >> 32); + fis->lba_high_exp = (u_int8_t)(lba >> 40); + + fis->device = ATA_H2D_DEVICE_LBA; + xa->data = csio->data_ptr; + + xa->complete = ahci_ata_complete_disk_rw; + ccbh->status = CAM_REQ_INPROG; + break; case TEST_UNIT_READY: case START_STOP_UNIT: case PREVENT_ALLOW: diff --git a/sys/dev/disk/ahci/atascsi.h b/sys/dev/disk/ahci/atascsi.h index d23a8fc1b9..1157401ec4 100644 --- a/sys/dev/disk/ahci/atascsi.h +++ b/sys/dev/disk/ahci/atascsi.h @@ -23,6 +23,7 @@ struct scsi_link; * ATA commands */ +#define ATA_C_DATA_SET_MANAGEMENT 0x06 /* Data Set Management command */ #define ATA_C_SATA_FEATURE_ENA 0x10 #define ATA_C_READDMA_EXT 0x25 #define ATA_C_READ_LOG_EXT 0x2f @@ -54,6 +55,7 @@ struct scsi_link; /* * ATA SET FEATURES subcommands */ +#define ATA_SF_DSM_TRIM 0x01 /* TRIM DSM feature */ #define ATA_SF_WRITECACHE_EN 0x02 #define ATA_SF_SETXFER 0x03 #define ATA_SF_LOOKAHEAD_EN 0xaa @@ -93,7 +95,10 @@ struct ata_identify { u_int16_t recmwdma; /* 66 */ u_int16_t minpio; /* 67 */ u_int16_t minpioflow; /* 68 */ - u_int16_t reserved4[2]; /* 69 */ + u_int16_t support3; /* 69 */ +#define ATA_SUPPORT_RZAT 0x0020 +#define ATA_SUPPORT_DRAT 0x4000 + u_int16_t reserved4; /* 70 */ u_int16_t typtime[2]; /* 71 */ u_int16_t reserved5[2]; /* 73 */ u_int16_t qdepth; /* 75 */ @@ -123,7 +128,7 @@ struct ata_identify { u_int16_t streamperf[2]; /* 98 */ u_int16_t addrsecxt[4]; /* 100 */ u_int16_t stream_xfer_p; /* 104 */ - u_int16_t padding1; /* 105 */ + u_int16_t max_dsm_blocks; /* 105 */ u_int16_t phys_sect_sz; /* 106 */ u_int16_t seek_delay; /* 107 */ u_int16_t naa_ieee_oui; /* 108 */ @@ -141,7 +146,10 @@ struct ata_identify { #define ATA_SECURE_LOCKED (1<<2) #define ATA_SECURE_FROZEN (1<<3) u_int16_t vendor[31]; /* 129 */ - u_int16_t padding3[16]; /* 160 */ + u_int16_t padding3[9]; /* 160 */ + u_int16_t support_dsm; /* 169 */ +#define ATA_SUPPORT_DSM_TRIM 0x0001 + u_int16_t padding5[6]; /* 170 */ u_int16_t curmedser[30]; /* 176 */ u_int16_t sctsupport; /* 206 */ u_int16_t padding4[48]; /* 207 */ diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 8818f194ed..5a5d8f5616 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1995,7 +1995,7 @@ vfs_bio_awrite(struct buf *bp) * * MPALMOSTSAFE */ -static struct buf * +struct buf * getnewbuf(int blkflags, int slptimeo, int size, int maxsize) { struct buf *bp; diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index efe3bcd580..c8e3a740fc 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -356,11 +356,11 @@ update: mp->mnt_kern_flag |= MNTK_WANTRDWR; mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | - MNT_NOSYMFOLLOW | MNT_IGNORE | + MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | - MNT_NOSYMFOLLOW | MNT_IGNORE | + MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); /* * Mount the filesystem. diff --git a/sys/sys/buf.h b/sys/sys/buf.h index ff28772684..71877b32bd 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -426,6 +426,7 @@ struct buf *findblk (struct vnode *, off_t, int); struct buf *getblk (struct vnode *, off_t, int, int, int); struct buf *getcacheblk (struct vnode *, off_t, int); struct buf *geteblk (int); +struct buf *getnewbuf(int, int, int, int); void bqhold(struct buf *bp); void bqdrop(struct buf *bp); void regetblk(struct buf *bp); diff --git a/sys/sys/disk.h b/sys/sys/disk.h index 6a3a6da11d..78887e7ca3 100644 --- a/sys/sys/disk.h +++ b/sys/sys/disk.h @@ -92,6 +92,7 @@ struct disk_info { u_int d_ncylinders; u_int d_secpertrack; u_int d_secpercyl; + u_int d_trimflag; char *d_serialno; }; diff --git a/sys/sys/ioctl_compat.h b/sys/sys/ioctl_compat.h index 7ed17fa45d..ea990569d3 100644 --- a/sys/sys/ioctl_compat.h +++ b/sys/sys/ioctl_compat.h @@ -166,5 +166,6 @@ struct sgttyb { #define OTTYDISC 0 #define NETLDISC 1 #define NTTYDISC 2 +#define IOCTLTRIM _IOW('t', 128, off_t[2]) #endif /* !_SYS_IOCTL_COMPAT_H_ */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 3b5f7a2ada..495314fa35 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -225,6 +225,7 @@ struct mount { #define MNT_SUIDDIR 0x00100000 /* special handling of SUID on dirs */ #define MNT_SOFTDEP 0x00200000 /* soft updates being done */ #define MNT_NOSYMFOLLOW 0x00400000 /* do not follow symlinks */ +#define MNT_TRIM 0x01000000 /* Enable online FS trimming */ #define MNT_NOATIME 0x10000000 /* disable update of file access time */ #define MNT_NOCLUSTERR 0x40000000 /* disable cluster read */ #define MNT_NOCLUSTERW 0x80000000 /* disable cluster write */ @@ -262,7 +263,7 @@ struct mount { MNT_LOCAL | MNT_USER | MNT_QUOTA | \ MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \ MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \ - MNT_IGNORE | MNT_NOSYMFOLLOW | MNT_EXPUBLIC ) + MNT_IGNORE | MNT_NOSYMFOLLOW | MNT_EXPUBLIC | MNT_TRIM) /* * External filesystem command modifier flags. * Unmount can use the MNT_FORCE flag. diff --git a/sys/vfs/ufs/ffs_alloc.c b/sys/vfs/ufs/ffs_alloc.c index 7743bc740d..7ee59c4a9f 100644 --- a/sys/vfs/ufs/ffs_alloc.c +++ b/sys/vfs/ufs/ffs_alloc.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include #include +#include #include #include "quota.h" @@ -64,6 +66,8 @@ typedef ufs_daddr_t allocfcn_t (struct inode *ip, int cg, ufs_daddr_t bpref, static ufs_daddr_t ffs_alloccg (struct inode *, int, ufs_daddr_t, int); static ufs_daddr_t ffs_alloccgblk (struct inode *, struct buf *, ufs_daddr_t); +static void ffs_blkfree_cg(struct fs *, struct vnode *, cdev_t , ino_t, + uint32_t , ufs_daddr_t, long ); #ifdef DIAGNOSTIC static int ffs_checkblk (struct inode *, ufs_daddr_t, long); #endif @@ -1475,36 +1479,35 @@ gotit: * block reassembly is checked. */ void -ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size) +ffs_blkfree_cg(struct fs * fs, struct vnode * i_devvp, cdev_t i_dev, ino_t i_number, + uint32_t i_din_uid, ufs_daddr_t bno, long size) { - struct fs *fs; struct cg *cgp; struct buf *bp; ufs_daddr_t blkno; int i, error, cg, blk, frags, bbase; uint8_t *blksfree; - fs = ip->i_fs; - VOP_FREEBLKS(ip->i_devvp, fsbtodoff(fs, bno), size); + VOP_FREEBLKS(i_devvp, fsbtodoff(fs, bno), size); if ((uint)size > fs->fs_bsize || fragoff(fs, size) != 0 || fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { kprintf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n", - devtoname(ip->i_dev), (long)bno, (long)fs->fs_bsize, size, + devtoname(i_dev), (long)bno, (long)fs->fs_bsize, size, fs->fs_fsmnt); panic("ffs_blkfree: bad size"); } cg = dtog(fs, bno); if ((uint)bno >= fs->fs_size) { kprintf("bad block %ld, ino %lu\n", - (long)bno, (u_long)ip->i_number); - ffs_fserr(fs, ip->i_uid, "bad block"); + (long)bno, (u_long)i_number); + ffs_fserr(fs, i_din_uid, "bad block"); return; } /* * Load the cylinder group */ - error = bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), + error = bread(i_devvp, fsbtodoff(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, &bp); if (error) { brelse(bp); @@ -1526,7 +1529,7 @@ ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size) blkno = fragstoblks(fs, bno); if (!ffs_isfreeblock(fs, blksfree, blkno)) { kprintf("dev = %s, block = %ld, fs = %s\n", - devtoname(ip->i_dev), (long)bno, fs->fs_fsmnt); + devtoname(i_dev), (long)bno, fs->fs_fsmnt); panic("ffs_blkfree: freeing free block"); } ffs_setblock(fs, blksfree, blkno); @@ -1564,7 +1567,7 @@ ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size) for (i = 0; i < frags; i++) { if (isset(blksfree, bno + i)) { kprintf("dev = %s, block = %ld, fs = %s\n", - devtoname(ip->i_dev), (long)(bno + i), + devtoname(i_dev), (long)(bno + i), fs->fs_fsmnt); panic("ffs_blkfree: freeing free frag"); } @@ -1601,6 +1604,90 @@ ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size) bdwrite(bp); } +struct ffs_blkfree_trim_params { + struct task task; + ufs_daddr_t bno; + long size; + + /* + * With TRIM, inode pointer is gone in the callback but we still need + * the following fields for ffs_blkfree_cg() + */ + struct vnode *i_devvp; + struct fs *i_fs; + cdev_t i_dev; + ino_t i_number; + uint32_t i_din_uid; +}; + + +static void +ffs_blkfree_trim_task(void *ctx, int pending) +{ + struct ffs_blkfree_trim_params *tp; + + tp = ctx; + ffs_blkfree_cg(tp->i_fs, tp->i_devvp, tp->i_dev, tp->i_number, + tp->i_din_uid, tp->bno, tp->size); + kfree(tp, M_TEMP); +} + + + +static void +ffs_blkfree_trim_completed(struct bio *biop) +{ + struct buf *bp = biop->bio_buf; + struct ffs_blkfree_trim_params *tp; + + tp = bp->b_bio1.bio_caller_info1.ptr; + TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp); + tp = biop->bio_caller_info1.ptr; + taskqueue_enqueue(taskqueue_swi, &tp->task); + biodone(biop); +} + + +/* + * If TRIM is enabled, we TRIM the blocks first then free them. We do this + * after TRIM is finished and the callback handler is called. The logic here + * is that we free the blocks before updating the bitmap so that we don't + * reuse a block before we actually trim it, which would result in trimming + * a valid block. + */ +void +ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size) +{ + struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount);; + struct ffs_blkfree_trim_params *tp; + + if (!(ump->um_mountp->mnt_flag & MNT_TRIM)) { + ffs_blkfree_cg(ip->i_fs, ip->i_devvp,ip->i_dev,ip->i_number, + ip->i_uid, bno, size); + return; + } + + struct buf *bp; + + tp = kmalloc(sizeof(struct ffs_blkfree_trim_params), M_TEMP, M_WAITOK); + tp->bno = bno; + tp->i_fs= ip->i_fs; + tp->i_devvp = ip->i_devvp; + tp->i_dev = ip->i_dev; + tp->i_din_uid = ip->i_uid; + tp->i_number = ip->i_number; + tp->size = size; + + bp = getnewbuf(0,0,0,1); + BUF_KERNPROC(bp); + bp->b_cmd = BUF_CMD_FREEBLKS; + bp->b_bio1.bio_offset = fsbtodoff(ip->i_fs, bno); + bp->b_bcount = size; + bp->b_bio1.bio_caller_info1.ptr = tp; + bp->b_bio1.bio_done = ffs_blkfree_trim_completed; + vn_strategy(ip->i_devvp, &bp->b_bio1); +} + #ifdef DIAGNOSTIC /* * Verify allocation of a block or fragment. Returns true if block or diff --git a/sys/vfs/ufs/ffs_vfsops.c b/sys/vfs/ufs/ffs_vfsops.c index a47758a22c..e49a0ae4d5 100644 --- a/sys/vfs/ufs/ffs_vfsops.c +++ b/sys/vfs/ufs/ffs_vfsops.c @@ -37,6 +37,7 @@ #include "opt_quota.h" +#include #include #include #include -- 2.41.0