From 6503926dbdde0c21b7203cf9473d140d732b4654 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Mon, 30 Jul 2012 12:29:31 +0800 Subject: [PATCH 01/16] tools: Add toeplitz, which calculate toeplitz on given addresses/ports --- tools/tools/toeplitz/Makefile | 10 ++ tools/tools/toeplitz/toeplitz.c | 173 ++++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 tools/tools/toeplitz/Makefile create mode 100644 tools/tools/toeplitz/toeplitz.c diff --git a/tools/tools/toeplitz/Makefile b/tools/tools/toeplitz/Makefile new file mode 100644 index 0000000000..efd1443b92 --- /dev/null +++ b/tools/tools/toeplitz/Makefile @@ -0,0 +1,10 @@ +# $DragonFly: src/tools/tools/netrate/pktgenctl/Makefile,v 1.2 2008/03/27 11:42:11 sephe Exp $ + +WARNS= 6 +PROG= toeplitz +BINDIR= /usr/sbin +NOMAN= + +CFLAGS+= -I${.CURDIR}/.. + +.include diff --git a/tools/tools/toeplitz/toeplitz.c b/tools/tools/toeplitz/toeplitz.c new file mode 100644 index 0000000000..73718b5b3d --- /dev/null +++ b/tools/tools/toeplitz/toeplitz.c @@ -0,0 +1,173 @@ +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#define KEYLEN 40 +#define HASHLEN 12 + +static uint8_t toeplitz_key[KEYLEN]; +static uint32_t hash_table[HASHLEN][256]; + +static void toeplitz_init(uint32_t[][256], int, const uint8_t[], int); +static void getaddrport(char *, uint32_t *, uint16_t *); + +static void +usage(const char *cmd) +{ + fprintf(stderr, "%s [-s s1_hex [-s s2_hex]] [-p] [-m mask]" + "addr1.port1 addr2.port2\n", cmd); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + uint32_t saddr, daddr; + uint16_t sport, dport; + uint32_t res, mask; + + const char *cmd = argv[0]; + uint8_t seeds[2] = { 0x6d, 0x5a }; + int i, opt, use_port; + + i = 0; + use_port = 0; + mask = 0xffffffff; + + while ((opt = getopt(argc, argv, "s:pm:")) != -1) { + switch (opt) { + case 's': + if (i >= 2) + usage(cmd); + seeds[i++] = strtoul(optarg, NULL, 16); + break; + + case 'p': + use_port = 1; + break; + + case 'm': + mask = strtoul(optarg, NULL, 16); + break; + + default: + usage(cmd); + } + } + argc -= optind; + argv += optind; + + if (argc != 2) + usage(cmd); + + for (i = 0; i < KEYLEN; ++i) { + if (i & 1) + toeplitz_key[i] = seeds[1]; + else + toeplitz_key[i] = seeds[0]; + } + + getaddrport(argv[0], &saddr, &sport); + getaddrport(argv[1], &daddr, &dport); + + toeplitz_init(hash_table, HASHLEN, toeplitz_key, KEYLEN); + + res = hash_table[0][(saddr >> 0) & 0xff]; + res ^= hash_table[1][(saddr >> 8) & 0xff]; + res ^= hash_table[2][(saddr >> 16) & 0xff]; + res ^= hash_table[3][(saddr >> 24) & 0xff]; + res ^= hash_table[4][(daddr >> 0) & 0xff]; + res ^= hash_table[5][(daddr >> 8) & 0xff]; + res ^= hash_table[6][(daddr >> 16) & 0xff]; + res ^= hash_table[7][(daddr >> 24) & 0xff]; + if (use_port) { + res ^= hash_table[8][(sport >> 0) & 0xff]; + res ^= hash_table[9][(sport >> 8) & 0xff]; + res ^= hash_table[10][(dport >> 0) & 0xff]; + res ^= hash_table[11][(dport >> 8) & 0xff]; + } + + printf("%#08x, masked %#8x\n", res, res & mask); + exit(0); +} + +static void +toeplitz_init(uint32_t cache[][256], int cache_len, + const uint8_t key_str[], int key_strlen) +{ + int i; + + if (key_strlen < cache_len + (int)sizeof(uint32_t)) + exit(1); + + for (i = 0; i < cache_len; ++i) { + uint32_t key[NBBY]; + int j, b, shift, val; + + bzero(key, sizeof(key)); + + /* + * Calculate 32bit keys for one byte; one key for each bit. + */ + for (b = 0; b < NBBY; ++b) { + for (j = 0; j < 32; ++j) { + uint8_t k; + int bit; + + bit = (i * NBBY) + b + j; + + k = key_str[bit / NBBY]; + shift = NBBY - (bit % NBBY) - 1; + if (k & (1 << shift)) + key[b] |= 1 << (31 - j); + } + } + + /* + * Cache the results of all possible bit combination of + * one byte. + */ + for (val = 0; val < 256; ++val) { + uint32_t res = 0; + + for (b = 0; b < NBBY; ++b) { + shift = NBBY - b - 1; + if (val & (1 << shift)) + res ^= key[b]; + } + cache[i][val] = res; + } + } +} + +static void +getaddrport(char *ap_str, uint32_t *addr, uint16_t *port0) +{ + uint16_t port; + char *p; + + p = strrchr(ap_str, '.'); + if (p == NULL) { + fprintf(stderr, "invalid addr.port %s\n", ap_str); + exit(1); + } + + *p = '\0'; + ++p; + + port = strtoul(p, NULL, 10); + *port0 = htons(port); + + if (inet_pton(AF_INET, ap_str, addr) <= 0) { + fprintf(stderr, "invalid addr %s\n", ap_str); + exit(1); + } +} -- 2.41.0 From b3f7971467e0ce7934443aab946e11396d9f78e6 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Mon, 30 Jul 2012 14:36:52 +0800 Subject: [PATCH 02/16] tcp: Increase tcp_sosend_agglim from 2 to 3 This change makes better use of TSO. When 2 is used, most of the large TCP segments' size are 2*MSS or 3*MSS. When 3 is used, most of the large TCP segments' size are 4*MSS or 5*MSS. The math is quite simple, given 1448B segment size: 2 == 4096B == 2 segments 3 == 6144B == 4 segments Increasing it to a higher value improves single full speed stream's CPU utilization, but does not have much effect on multiple streams'. --- sys/netinet/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 81fb6fc08b..dd906b5ee1 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -249,7 +249,7 @@ int tcp_autorcvbuf_max = 2*1024*1024; SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW, &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer"); -int tcp_sosend_agglim = 2; +int tcp_sosend_agglim = 3; SYSCTL_INT(_net_inet_tcp, OID_AUTO, sosend_agglim, CTLFLAG_RW, &tcp_sosend_agglim, 0, "TCP sosend mbuf aggregation limit"); -- 2.41.0 From f95f4241646d758934cfd0f3134fcee57ceb8994 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fran=C3=A7ois=20Tigeot?= Date: Sun, 29 Jul 2012 18:23:31 +0200 Subject: [PATCH 03/16] kernel: remove O_ROOTCRED * This flag was DragonFly-specific and effectively unused * Removing it frees up flag space for interacting with the rest of the world * Partially fixes issue #2357 --- sys/kern/kern_fp.c | 11 ++++------- sys/sys/fcntl.h | 4 ---- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/sys/kern/kern_fp.c b/sys/kern/kern_fp.c index 710c62d316..13916fbe0e 100644 --- a/sys/kern/kern_fp.c +++ b/sys/kern/kern_fp.c @@ -85,8 +85,7 @@ typedef struct file *file_t; * * Open a file as specified. Use O_* flags for flags. * - * NOTE! O_ROOTCRED not quite working yet, vn_open() asserts that the - * cred must match the process's cred. XXX + * vn_open() asserts that the cred must match the process's cred. * * NOTE! when fp_open() is called from a pure thread, root creds are * used. @@ -103,10 +102,8 @@ fp_open(const char *path, int flags, int mode, file_t *fpp) return (error); fp = *fpp; td = curthread; - if (td->td_proc) { - if ((flags & O_ROOTCRED) == 0) - fsetcred(fp, td->td_proc->p_ucred); - } + if (td->td_proc) + fsetcred(fp, td->td_proc->p_ucred); error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_LOCKVP); flags = FFLAGS(flags); if (error == 0) @@ -177,7 +174,7 @@ fp_vpopen(struct vnode *vp, int flags, file_t *fpp) if ((error = falloc(NULL, fpp, NULL)) != 0) goto bad2; fp = *fpp; - if ((flags & O_ROOTCRED) == 0 && td->td_proc) + if (td->td_proc) fsetcred(fp, td->td_proc->p_ucred); error = VOP_OPEN(vp, flags, td->td_proc->p_ucred, fp); diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 353ab9b643..2733b37804 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -97,10 +97,6 @@ /* Attempt to bypass the buffer cache */ #define O_DIRECT 0x00010000 -#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) -#define O_ROOTCRED 0x00020000 /* fp_open */ -#endif - #define O_FBLOCKING 0x00040000 /* force blocking I/O */ #define O_FNONBLOCKING 0x00080000 /* force non-blocking I/O */ #define O_FAPPEND 0x00100000 /* force append mode for write */ -- 2.41.0 From 05dd1c0b78aefd17c57d5dcb882eba6fe948ea4d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fran=C3=A7ois=20Tigeot?= Date: Sun, 29 Jul 2012 19:26:02 +0200 Subject: [PATCH 04/16] kernel: remove O_FRNONBLOCKING * Like O_ROOTCRED it was effectively unused and removing it frees up a precious bit among the various O_xxx flags * Partially fixes issue #2357 --- sys/kern/vfs_vnops.c | 4 +--- sys/sys/fcntl.h | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index ec8fabafdd..9b28f6611f 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -647,9 +647,7 @@ vn_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) vp = (struct vnode *)fp->f_data; ioflag = 0; - if (flags & O_FRNONBLOCKING) { - ioflag |= (IO_NDELAY | IO_NRDELAY); - } else if (flags & O_FBLOCKING) { + if (flags & O_FBLOCKING) { /* ioflag &= ~IO_NDELAY; */ } else if (flags & O_FNONBLOCKING) { ioflag |= IO_NDELAY; diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 2733b37804..8566bb60ea 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -106,7 +106,6 @@ #define O_FUNBUFFERED 0x01000000 /* force unbuffered (direct) I/O */ #define O_FBUFFERED 0x02000000 /* force buffered I/O */ #define O_MAPONREAD 0x04000000 /* memory map read buffer */ -#define O_FRNONBLOCKING 0x08000000 /* nonblocking I/O no disk wait */ #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) #define FREVOKED 0x10000000 /* revoked by fdrevoke() */ @@ -117,7 +116,7 @@ #define O_FMASK (O_FBLOCKING|O_FNONBLOCKING|O_FAPPEND|O_FOFFSET|\ O_FSYNCWRITE|O_FASYNCWRITE|O_FUNBUFFERED|O_FBUFFERED|\ - O_MAPONREAD|O_FRNONBLOCKING) + O_MAPONREAD) #ifdef _KERNEL /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */ -- 2.41.0 From 28d748b9da52d38a8f9b903e9dde1480effdf8b7 Mon Sep 17 00:00:00 2001 From: Alex Hornung Date: Mon, 30 Jul 2012 11:20:00 +0000 Subject: [PATCH 05/16] open - implement O_DIRECTORY --- sys/kern/vfs_vnops.c | 4 ++++ sys/sys/fcntl.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 9b28f6611f..90ae370e67 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -208,6 +208,10 @@ again: error = EOPNOTSUPP; goto bad; } + if (vp->v_type != VDIR && (fmode & O_DIRECTORY)) { + error = ENOTDIR; + goto bad; + } if ((fmode & O_CREAT) == 0) { if (fmode & (FWRITE | O_TRUNC)) { if (vp->v_type == VDIR) { diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 8566bb60ea..489f0b625f 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -107,6 +107,8 @@ #define O_FBUFFERED 0x02000000 /* force buffered I/O */ #define O_MAPONREAD 0x04000000 /* memory map read buffer */ +#define O_DIRECTORY 0x08000000 /* error if not a directory */ + #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) #define FREVOKED 0x10000000 /* revoked by fdrevoke() */ #define FAPPENDONLY 0x20000000 /* O_APPEND cannot be changed */ -- 2.41.0 From f3b0543ffd87cda1826acc97ac6cfc7696e6e9b0 Mon Sep 17 00:00:00 2001 From: Alex Hornung Date: Mon, 30 Jul 2012 14:02:26 +0000 Subject: [PATCH 06/16] open.2 - document O_DIRECTORY --- lib/libc/sys/open.2 | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2 index 2e0c634e71..63eb916d95 100644 --- a/lib/libc/sys/open.2 +++ b/lib/libc/sys/open.2 @@ -33,7 +33,7 @@ .\" $FreeBSD: src/lib/libc/sys/open.2,v 1.11.2.9 2001/12/14 18:34:01 ru Exp $ .\" $DragonFly: src/lib/libc/sys/open.2,v 1.3 2005/07/29 23:16:04 hsu Exp $ .\" -.Dd July 24, 2009 +.Dd July 31, 2012 .Dt OPEN 2 .Os .Sh NAME @@ -119,6 +119,7 @@ O_EXLOCK atomically obtain an exclusive lock O_DIRECT eliminate or reduce cache effects O_FSYNC synchronous writes O_NOFOLLOW do not follow symlinks +O_DIRECTORY error if file is not a directory .Ed .Pp Opening a file with @@ -193,6 +194,13 @@ If it cannot avoid caching the data, it will minimize the impact the data has on the cache. Use of this flag can drastically reduce performance if not used with care. .Pp +.Dv O_DIRECTORY +may be used to ensure the resulting file descriptor refers to a directory. +This flag can be used to prevent applications with elevated privileges +from opening files which are even unsafe to open with +.Dv O_RDONLY , +such as device nodes. +.Pp If successful, .Fn open and @@ -235,7 +243,9 @@ argument is not an absolute path and the .Fa fd argument is neither .Dv AT_FDCWD -nor a file descriptor associated with a directory. +nor a file descriptor associated with a directory or +.Dv O_DIRECTORY +is specified and the file is not a directory. .It Bq Er ENAMETOOLONG A component of a pathname exceeded 255 characters, or an entire path name exceeded 1023 characters. -- 2.41.0 From 77e32d4c0406bbc862bec9754cfcde0910c2bfd8 Mon Sep 17 00:00:00 2001 From: Alex Hornung Date: Mon, 30 Jul 2012 11:21:53 +0000 Subject: [PATCH 07/16] implement linkat --- include/unistd.h | 2 +- sys/kern/init_sysent.c | 1 + sys/kern/syscalls.c | 1 + sys/kern/syscalls.master | 2 ++ sys/kern/vfs_syscalls.c | 27 +++++++++++++++++++++++++++ sys/sys/fcntl.h | 1 + sys/sys/syscall-hide.h | 1 + sys/sys/syscall.h | 3 ++- sys/sys/syscall.mk | 3 ++- sys/sys/sysproto.h | 11 +++++++++++ sys/sys/sysunion.h | 1 + 11 files changed, 50 insertions(+), 3 deletions(-) diff --git a/include/unistd.h b/include/unistd.h index b9efb522eb..b8dbf463df 100644 --- a/include/unistd.h +++ b/include/unistd.h @@ -425,8 +425,8 @@ int faccessat(int, const char *, int, int); int fchownat(int, const char *, uid_t, gid_t, int); #if 0 int fexecve(int, char *const [], char *const []); -int linkat(int, const char *, int, const char *, int); #endif +int linkat(int, const char *, int, const char *, int); ssize_t readlinkat(int, const char * __restrict, char * __restrict, size_t); int symlinkat(const char *, int, const char *); int unlinkat(int, const char *, int); diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 3c0762ba3c..b96207f8fc 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -564,4 +564,5 @@ struct sysent sysent[] = { { AS(symlinkat_args), (sy_call_t *)sys_symlinkat }, /* 528 = symlinkat */ { AS(swapoff_args), (sy_call_t *)sys_swapoff }, /* 529 = swapoff */ { AS(vquotactl_args), (sy_call_t *)sys_vquotactl }, /* 530 = vquotactl */ + { AS(linkat_args), (sy_call_t *)sys_linkat }, /* 531 = linkat */ }; diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index 0c8e3f0d76..013a338f64 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -538,4 +538,5 @@ const char *syscallnames[] = { "symlinkat", /* 528 = symlinkat */ "swapoff", /* 529 = swapoff */ "vquotactl", /* 530 = vquotactl */ + "linkat", /* 531 = linkat */ }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 843f90e721..8a5a793fed 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -745,3 +745,5 @@ 529 STD BSD { int swapoff(char *name); } 530 STD BSD { int vquotactl(const char *path, \ struct plistref *pref); } +531 STD POSIX { int linkat(int fd1, char *path1, int fd2, \ + char *path2, int flags); } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 11c66a80c7..4e9fe492aa 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -2287,6 +2287,33 @@ sys_link(struct link_args *uap) return (error); } +/* + * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) + * + * Make a hard file link. The path1 argument is relative to the directory + * associated with fd1, and similarly the path2 argument is relative to + * the directory associated with fd2. + */ +int +sys_linkat(struct linkat_args *uap) +{ + struct nlookupdata nd, linknd; + struct file *fp1, *fp2; + int error; + + error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, + (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); + if (error == 0) { + error = nlookup_init_at(&linknd, &fp2, uap->fd2, + uap->path2, UIO_USERSPACE, 0); + if (error == 0) + error = kern_link(&nd, &linknd); + nlookup_done_at(&linknd, fp2); + } + nlookup_done_at(&nd, fp1); + return (error); +} + int kern_symlink(struct nlookupdata *nd, char *path, int mode) { diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 489f0b625f..03e657b777 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -170,6 +170,7 @@ #define AT_SYMLINK_NOFOLLOW 1 #define AT_REMOVEDIR 2 #define AT_EACCESS 4 +#define AT_SYMLINK_FOLLOW 8 /* * Constants used for fcntl(2) diff --git a/sys/sys/syscall-hide.h b/sys/sys/syscall-hide.h index 8d1e08689e..e0310e1464 100644 --- a/sys/sys/syscall-hide.h +++ b/sys/sys/syscall-hide.h @@ -361,3 +361,4 @@ HIDE_POSIX(readlinkat) HIDE_POSIX(symlinkat) HIDE_BSD(swapoff) HIDE_BSD(vquotactl) +HIDE_POSIX(linkat) diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index a97cb28504..ed049b483a 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -370,4 +370,5 @@ #define SYS_symlinkat 528 #define SYS_swapoff 529 #define SYS_vquotactl 530 -#define SYS_MAXSYSCALL 531 +#define SYS_linkat 531 +#define SYS_MAXSYSCALL 532 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index 5a8f8d8162..10a711d404 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -309,4 +309,5 @@ MIASM = \ readlinkat.o \ symlinkat.o \ swapoff.o \ - vquotactl.o + vquotactl.o \ + linkat.o diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index e02fa51188..d3344697f5 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -2365,6 +2365,16 @@ struct vquotactl_args { const char * path; char path_[PAD_(const char *)]; struct plistref * pref; char pref_[PAD_(struct plistref *)]; }; +struct linkat_args { +#ifdef _KERNEL + struct sysmsg sysmsg; +#endif + int fd1; char fd1_[PAD_(int)]; + char * path1; char path1_[PAD_(char *)]; + int fd2; char fd2_[PAD_(int)]; + char * path2; char path2_[PAD_(char *)]; + int flags; char flags_[PAD_(int)]; +}; #ifdef COMPAT_43 @@ -2982,6 +2992,7 @@ int sys_readlinkat (struct readlinkat_args *); int sys_symlinkat (struct symlinkat_args *); int sys_swapoff (struct swapoff_args *); int sys_vquotactl (struct vquotactl_args *); +int sys_linkat (struct linkat_args *); #endif /* !_SYS_SYSPROTO_H_ */ #undef PAD_ diff --git a/sys/sys/sysunion.h b/sys/sys/sysunion.h index 6006e90511..b18ba480de 100644 --- a/sys/sys/sysunion.h +++ b/sys/sys/sysunion.h @@ -417,4 +417,5 @@ union sysunion { struct symlinkat_args symlinkat; struct swapoff_args swapoff; struct vquotactl_args vquotactl; + struct linkat_args linkat; }; -- 2.41.0 From 34e96d8ccd8e2301e0cd27f94d16d9bc0f8253fb Mon Sep 17 00:00:00 2001 From: Alex Hornung Date: Mon, 30 Jul 2012 11:37:09 +0000 Subject: [PATCH 08/16] linkat(2) man page --- lib/libc/sys/Makefile.inc | 1 + lib/libc/sys/link.2 | 79 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index ef1ddf5258..bd2e7ad4be 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -132,6 +132,7 @@ MLINKS+=intro.2 errno.2 MLINKS+=ioprio_get.2 ioprio_set.2 MLINKS+=kqueue.2 EV_SET.2 \ kqueue.2 kevent.2 +MLINKS+=link.2 linkat.2 MLINKS+=lseek.2 seek.2 MLINKS+=madvise.2 mcontrol.2 \ madvise.2 posix_madvise.3 diff --git a/lib/libc/sys/link.2 b/lib/libc/sys/link.2 index 8107f25d34..e27acb8c74 100644 --- a/lib/libc/sys/link.2 +++ b/lib/libc/sys/link.2 @@ -33,7 +33,7 @@ .\" $FreeBSD: src/lib/libc/sys/link.2,v 1.11.2.7 2001/12/14 18:34:01 ru Exp $ .\" $DragonFly: src/lib/libc/sys/link.2,v 1.4 2007/04/26 17:35:03 swildner Exp $ .\" -.Dd March 5, 1999 +.Dd July 31, 2012 .Dt LINK 2 .Os .Sh NAME @@ -45,6 +45,8 @@ .In unistd.h .Ft int .Fn link "const char *name1" "const char *name2" +.Ft int +.Fn linkat "int fd1" "const char *name1" "int fd2" "const char *name2" "int flags" .Sh DESCRIPTION The .Fn link @@ -80,6 +82,45 @@ and must be in the same file system. .Fa name1 may not be a directory. +.Pp +When operating on a symlink, +.Fn link +resolves the symlink and creates a hard link on the target. +.Fn linkat +will do the same if +.Dv AT_SYMLINK_FOLLOW +is set in +.Fa flags , +but it will link on the symlink itself if the flag is clear. +.Pp +The +.Fn linkat +system call is equivalent to +.Fa link +except in the case where either +.Fa name1 +or +.Fa name2 +or both are relative paths. +In this case a relative path +.Fa name1 +is interpreted relative to the directory associated with the file descriptor +.Fa fd1 +instead of the current working directory and similarly for +.Fa name2 +and the file descriptor +.Fa fd2 . +If +.Fa fd1 +or +.Fa fd2 +has the special value +.Dv AT_FDCWD +then +.Fa name1 +or +.Fa name2 +respectively is interpreted relative to the current working directory. .Sh RETURN VALUES .Rv -std link .Sh ERRORS @@ -148,6 +189,38 @@ system. One of the pathnames specified is outside the process's allocated address space. .El +.Pp +In addition to the errors returned by the +.Fn link +syscall, the +.Fn linkat +system call may fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa name1 +or +.Fa name2 +argument does not specify an absolute path and the +.Fa fd1 +or +.Fa fd2 +argument, respectively, is neither +.Dv AT_FDCWD +nor a valid file descriptor open for searching. +.It Bq Er ENOTDIR +The +.Fa name1 +or +.Fa name2 +argument is not an absolute path and +.Fa fd1 +or +.Fa fd2 , +respectively, is neither +.Dv AT_FDCWD +nor a file descriptor associated with a directory. +.El .Sh SEE ALSO .Xr readlink 2 , .Xr symlink 2 , @@ -162,6 +235,10 @@ A .Fn link function call appeared in .At v7 . +The +.Fn linkat +system call appeared in +.Dx 3.1 . .Pp The .Fn link -- 2.41.0 From c0a7429cb21e62029fac531a2089569b7cfbedd3 Mon Sep 17 00:00:00 2001 From: Alex Hornung Date: Mon, 30 Jul 2012 15:58:05 +0000 Subject: [PATCH 09/16] opendir(3) - use O_DIRECTORY --- lib/libc/gen/opendir.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/libc/gen/opendir.c b/lib/libc/gen/opendir.c index 04374081a1..e95a9643f2 100644 --- a/lib/libc/gen/opendir.c +++ b/lib/libc/gen/opendir.c @@ -83,12 +83,8 @@ __opendir2(const char *name, int flags) */ if (stat(name, &statb) != 0) return (NULL); - if (!S_ISDIR(statb.st_mode)) { - errno = ENOTDIR; - return (NULL); - } - if ((fd = _open(name, O_RDONLY | O_NONBLOCK)) == -1) + if ((fd = _open(name, O_RDONLY | O_NONBLOCK | O_DIRECTORY)) == -1) return (NULL); dirp = __fdopendir2(fd, flags); if (dirp == NULL) { -- 2.41.0 From 8a7990ec4818dfd56034bd7b8a8c35e40ca7a3c6 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Mon, 30 Jul 2012 18:28:00 +0200 Subject: [PATCH 10/16] : Mark O_DIRECTORY as being new in POSIX.1-2008. It is also visible in our default environment, just not if the code explicitly requests an older POSIX version. --- sys/sys/fcntl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 03e657b777..5f2b1908e8 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -37,7 +37,6 @@ * * @(#)fcntl.h 8.3 (Berkeley) 1/21/94 * $FreeBSD: src/sys/sys/fcntl.h,v 1.9.2.2 2001/06/03 05:00:10 dillon Exp $ - * $DragonFly: src/sys/sys/fcntl.h,v 1.14 2008/02/12 20:00:38 corecode Exp $ */ #ifndef _SYS_FCNTL_H_ @@ -107,7 +106,9 @@ #define O_FBUFFERED 0x02000000 /* force buffered I/O */ #define O_MAPONREAD 0x04000000 /* memory map read buffer */ +#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809 #define O_DIRECTORY 0x08000000 /* error if not a directory */ +#endif #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) #define FREVOKED 0x10000000 /* revoked by fdrevoke() */ -- 2.41.0 From 1dc13257a62ef79987eea8072020520b1d6499d3 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Mon, 30 Jul 2012 19:51:07 +0200 Subject: [PATCH 11/16] kdump(1)/truss(1): Fix occasional quickworld breakage. Whenever we messed with our ioctls or certain definitions (like fcntl(2)'s O_* flags), kdump and truss could break quickworld because their generated ioctl.c and kdump_subr.c files in /usr/obj would still have removed stuff. Fix this by regenerating them every time, no matter what. --- usr.bin/kdump/Makefile | 5 ++--- usr.bin/truss/Makefile | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/usr.bin/kdump/Makefile b/usr.bin/kdump/Makefile index 84ad94eff8..02679a9901 100644 --- a/usr.bin/kdump/Makefile +++ b/usr.bin/kdump/Makefile @@ -1,6 +1,5 @@ # @(#)Makefile 8.1 (Berkeley) 6/6/93 # $FreeBSD: src/usr.bin/kdump/Makefile,v 1.12 2006/05/20 14:27:22 netchild Exp $ -# $DragonFly: src/usr.bin/kdump/Makefile,v 1.7 2008/01/01 14:09:02 matthias Exp $ .PATH: ${.CURDIR}/../ktrace @@ -10,10 +9,10 @@ CFLAGS+= -I${.CURDIR}/../ktrace -I${.CURDIR} -I${.CURDIR}/../.. -I${.CURDIR}/../ CLEANFILES= ioctl.c kdump_subr.c -ioctl.c: mkioctls +ioctl.c! sh ${.CURDIR}/mkioctls ${DESTDIR}/usr/include > ${.TARGET} -kdump_subr.c: mksubr +kdump_subr.c! sh ${.CURDIR}/mksubr ${DESTDIR}/usr/include > ${.TARGET} .include diff --git a/usr.bin/truss/Makefile b/usr.bin/truss/Makefile index 23132cc2f0..ac91c4bb90 100644 --- a/usr.bin/truss/Makefile +++ b/usr.bin/truss/Makefile @@ -1,5 +1,4 @@ # $FreeBSD: src/usr.bin/truss/Makefile,v 1.10.2.2 2002/07/23 09:18:30 ru Exp $ -# $DragonFly: src/usr.bin/truss/Makefile,v 1.8 2007/08/27 16:51:00 pavalos Exp $ PROG= truss SRCS= main.c setup.c syscalls.c syscalls.h ioctl.c ${MACHINE_ARCH}-fbsd.c @@ -28,7 +27,7 @@ syscalls.h: syscalls.master /bin/sh ${.CURDIR}/../../sys/kern/makesyscalls.sh syscalls.master \ ${.CURDIR}/i386.conf -ioctl.c: ${.CURDIR}/../kdump/mkioctls +ioctl.c! sh ${.CURDIR}/../kdump/mkioctls ${DESTDIR}/usr/include > ${.TARGET} .include -- 2.41.0 From 6e4ea98e8a4f403762582bd657180d3bd3636505 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fran=C3=A7ois=20Tigeot?= Date: Mon, 30 Jul 2012 22:39:46 +0200 Subject: [PATCH 12/16] kernel: Implement O_CLOEXEC * Using fcntl(2) just after open(2) is not enough to avoid race conditions in programs doing fork+exec sequences. Child processes may be created before fcntl() is run and inherit the parent's file descriptors. * In some circonstances this behavior may even create security issues. * O_CLOEXEC can be used to atomically set the close-on-exec flag for new file descriptors, avoiding the whole mess in the first place. * Fixes issue #2356 Inspired-from: NetBSD --- lib/libc/sys/open.2 | 18 +++++++++++++++--- sys/kern/vfs_syscalls.c | 12 ++++++++---- sys/sys/fcntl.h | 3 +++ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2 index 63eb916d95..99aa306246 100644 --- a/lib/libc/sys/open.2 +++ b/lib/libc/sys/open.2 @@ -120,6 +120,7 @@ O_DIRECT eliminate or reduce cache effects O_FSYNC synchronous writes O_NOFOLLOW do not follow symlinks O_DIRECTORY error if file is not a directory +O_CLOEXEC set FD_CLOEXEC upon open .Ed .Pp Opening a file with @@ -201,6 +202,11 @@ from opening files which are even unsafe to open with .Dv O_RDONLY , such as device nodes. .Pp +.Dv O_CLOEXEC +may be used to atomically set the +.Dv FD_CLOEXEC +flag for the newly returned file descriptor. +.Pp If successful, .Fn open and @@ -213,12 +219,18 @@ file is set to the beginning of the file. When a new file is created it is given the group of the directory which contains it. .Pp -The new descriptor is set to remain open across +Unless +.Dv +O_CLOEXEC +was specified, +the new descriptor is set to remain open across .Xr execve 2 system calls; see -.Xr close 2 +.Xr close 2 , +.Xr fcntl 2 and -.Xr fcntl 2 . +.Dv O_CLOEXEC +description. .Pp The system imposes a limit on the number of file descriptors open simultaneously by one process. diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 4e9fe492aa..7d39c666d4 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1811,7 +1811,7 @@ kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) struct file *nfp; struct file *fp; struct vnode *vp; - int type, indx, error; + int type, indx, error = 0; struct flock lf; if ((oflags & O_ACCMODE) == O_ACCMODE) @@ -1930,7 +1930,9 @@ kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) fsetfd(fdp, fp, indx); fdrop(fp); *res = indx; - return (0); + if (oflags & O_CLOEXEC) + error = fsetfdflags(fdp, *res, UF_EXCLOSE); + return (error); } /* @@ -4157,7 +4159,7 @@ sys_fhopen(struct fhopen_args *uap) struct vattr vat; struct vattr *vap = &vat; struct flock lf; - int fmode, mode, error, type; + int fmode, mode, error = 0, type; struct file *nfp; struct file *fp; int indx; @@ -4308,7 +4310,9 @@ sys_fhopen(struct fhopen_args *uap) fsetfd(fdp, fp, indx); fdrop(fp); uap->sysmsg_result = indx; - return (0); + if (uap->flags & O_CLOEXEC) + error = fsetfdflags(fdp, indx, UF_EXCLOSE); + return (error); bad_drop: fsetfd(fdp, NULL, indx); diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 5f2b1908e8..00fab35afe 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -96,6 +96,9 @@ /* Attempt to bypass the buffer cache */ #define O_DIRECT 0x00010000 +#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809 +#define O_CLOEXEC 0x00020000 /* atomically set FD_CLOEXEC */ +#endif #define O_FBLOCKING 0x00040000 /* force blocking I/O */ #define O_FNONBLOCKING 0x00080000 /* force non-blocking I/O */ #define O_FAPPEND 0x00100000 /* force append mode for write */ -- 2.41.0 From be417588557b24a8bd8dbf371bc134fcd83c7f99 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fran=C3=A7ois=20Tigeot?= Date: Tue, 31 Jul 2012 10:32:24 +0200 Subject: [PATCH 13/16] opendir(3): use O_CLOEXEC --- lib/libc/gen/opendir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/libc/gen/opendir.c b/lib/libc/gen/opendir.c index e95a9643f2..888fd5202a 100644 --- a/lib/libc/gen/opendir.c +++ b/lib/libc/gen/opendir.c @@ -31,7 +31,6 @@ * SUCH DAMAGE. * * $FreeBSD: src/lib/libc/gen/opendir.c,v 1.10.2.1 2001/06/04 20:59:48 joerg Exp $ - * $DragonFly: src/lib/libc/gen/opendir.c,v 1.6 2005/11/13 00:07:42 swildner Exp $ * * @(#)opendir.c 8.8 (Berkeley) 5/1/95 */ @@ -84,7 +83,8 @@ __opendir2(const char *name, int flags) if (stat(name, &statb) != 0) return (NULL); - if ((fd = _open(name, O_RDONLY | O_NONBLOCK | O_DIRECTORY)) == -1) + fd = _open(name, O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_CLOEXEC); + if (fd == -1) return (NULL); dirp = __fdopendir2(fd, flags); if (dirp == NULL) { -- 2.41.0 From 69647051b5f8c8d77e2d598952869565b8ec9f43 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Tue, 31 Jul 2012 13:39:05 +0200 Subject: [PATCH 14/16] LINT/LINT64: Add BNX_TSO_DEBUG. --- sys/config/LINT | 1 + sys/config/LINT64 | 1 + 2 files changed, 2 insertions(+) diff --git a/sys/config/LINT b/sys/config/LINT index 45f958ffee..d661e04eac 100644 --- a/sys/config/LINT +++ b/sys/config/LINT @@ -2495,6 +2495,7 @@ options DEBUG_CRIT_SECTIONS options DEBUG_INTERRUPTS #options DISABLE_PSE options BCE_DEBUG +options BNX_TSO_DEBUG options EMX_RSS_DEBUG options JME_RSS_DEBUG options IGB_RSS_DEBUG diff --git a/sys/config/LINT64 b/sys/config/LINT64 index fd3017a01e..e24a3dcd8a 100644 --- a/sys/config/LINT64 +++ b/sys/config/LINT64 @@ -2258,6 +2258,7 @@ options DEBUG_CRIT_SECTIONS options DEBUG_INTERRUPTS #options DISABLE_PSE options BCE_DEBUG +options BNX_TSO_DEBUG options EMX_RSS_DEBUG options JME_RSS_DEBUG options IGB_RSS_DEBUG -- 2.41.0 From c4a0768d27df927986fd5cc101dae2eaff8af6e2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fran=C3=A7ois=20Tigeot?= Date: Tue, 31 Jul 2012 15:19:57 +0200 Subject: [PATCH 15/16] ixgbe: remove some FreeBSD version checks --- sys/dev/netif/ixgbe/ixgbe.h | 27 +-------------------------- sys/dev/netif/ixgbe/ixgbe_osdep.h | 6 ------ 2 files changed, 1 insertion(+), 32 deletions(-) diff --git a/sys/dev/netif/ixgbe/ixgbe.h b/sys/dev/netif/ixgbe/ixgbe.h index 0ea2d13845..f51014749e 100644 --- a/sys/dev/netif/ixgbe/ixgbe.h +++ b/sys/dev/netif/ixgbe/ixgbe.h @@ -39,9 +39,6 @@ #include #include -#if __FreeBSD_version >= 800000 -#include -#endif #include #include #include @@ -196,16 +193,7 @@ #define IXGBE_QUEUE_DEPLETED 8 /* Offload bits in mbuf flag */ -#if __FreeBSD_version >= 800000 -#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) -#else #define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) -#endif - -/* For 6.X code compatibility */ -#if !defined(ETHER_BPF_MTAP) -#define ETHER_BPF_MTAP BPF_MTAP -#endif /* * Interrupt Moderation parameters @@ -297,7 +285,7 @@ struct tx_ring { u32 txd_cmd; bus_dma_tag_t txtag; char lock_name[16]; -#if __FreeBSD_version >= 800000 +#ifdef IFNET_BUF_RING struct buf_ring *br; #endif #ifdef IXGBE_FDIR @@ -502,19 +490,6 @@ ixgbe_is_sfp(struct ixgbe_hw *hw) } } -/* Workaround to make 8.0 buildable */ -#if __FreeBSD_version >= 800000 && __FreeBSD_version < 800504 -static __inline int -drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br) -{ -#ifdef ALTQ - if (ALTQ_IS_ENABLED(&ifp->if_snd)) - return (1); -#endif - return (!buf_ring_empty(br)); -} -#endif - /* ** Find the number of unrefreshed RX descriptors */ diff --git a/sys/dev/netif/ixgbe/ixgbe_osdep.h b/sys/dev/netif/ixgbe/ixgbe_osdep.h index afc5068a83..f0f505fc34 100644 --- a/sys/dev/netif/ixgbe/ixgbe_osdep.h +++ b/sys/dev/netif/ixgbe/ixgbe_osdep.h @@ -110,17 +110,11 @@ typedef boolean_t bool; #define le16_to_cpu -#if __FreeBSD_version < 800000 #if defined(__i386__) || defined(__amd64__) -#define mb() __asm volatile("mfence" ::: "memory") #define wmb() __asm volatile("sfence" ::: "memory") -#define rmb() __asm volatile("lfence" ::: "memory") #else -#define mb() -#define rmb() #define wmb() #endif -#endif #if defined(__i386__) || defined(__amd64__) static __inline -- 2.41.0 From 7df3633519a8fe6eae96af59ff4d6d40e18c7881 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Wed, 1 Aug 2012 10:35:06 +0800 Subject: [PATCH 16/16] mbuf: Save linker layer, IP and TCP/UDP header length This could ease most drivers's TSO operation and avoid extra data area accessing during TSO setting up. This could also help Intel's 1000M/10G drivers' hardware checksum offloading, which requires protocol header length. --- sys/net/if_ethersubr.c | 1 + sys/net/pf/pf.c | 1 + sys/net/vlan/if_vlan_ether.c | 1 + sys/netinet/ip_output.c | 2 ++ sys/netinet/tcp_output.c | 4 +++- sys/netinet/tcp_subr.c | 1 + sys/netinet/tcp_syncache.c | 1 + sys/netinet/udp_usrreq.c | 1 + sys/sys/mbuf.h | 5 +++++ 9 files changed, 16 insertions(+), 1 deletion(-) diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 256bfa1138..6923356ab9 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -220,6 +220,7 @@ ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, M_PREPEND(m, sizeof(struct ether_header), MB_DONTWAIT); if (m == NULL) return (ENOBUFS); + m->m_pkthdr.csum_lhlen = sizeof(struct ether_header); eh = mtod(m, struct ether_header *); edst = eh->ether_dhost; diff --git a/sys/net/pf/pf.c b/sys/net/pf/pf.c index 1fb85c1f13..4bcaed3743 100644 --- a/sys/net/pf/pf.c +++ b/sys/net/pf/pf.c @@ -5537,6 +5537,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, sw_csum &= ~CSUM_DELAY_DATA; } m0->m_pkthdr.csum_flags &= ifp->if_hwassist; + m0->m_pkthdr.csum_iphlen = (ip->ip_hl << 2); if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT && diff --git a/sys/net/vlan/if_vlan_ether.c b/sys/net/vlan/if_vlan_ether.c index ace6e41368..a77985284b 100644 --- a/sys/net/vlan/if_vlan_ether.c +++ b/sys/net/vlan/if_vlan_ether.c @@ -99,6 +99,7 @@ vlan_start_dispatch(netmsg_t msg) if_printf(ifp, "vlan%u m_pullup failed", vlantag); goto back; } + m->m_pkthdr.csum_lhlen = sizeof(struct ether_vlan_header); /* * Transform the Ethernet header into an Ethernet header diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 042a9b4028..5ccbb60302 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -922,6 +922,7 @@ pass: } else { sw_csum = 0; } + m->m_pkthdr.csum_iphlen = hlen; /* * If small enough for interface, or the interface will take @@ -1188,6 +1189,7 @@ smart_frag_failure: m->m_pkthdr.len = mhlen + len; m->m_pkthdr.rcvif = NULL; m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; + m->m_pkthdr.csum_iphlen = mhlen; mhip->ip_off = htons(mhip->ip_off); mhip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 6c9c5aa975..994620397d 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1023,12 +1023,14 @@ after_th: sizeof(struct ip6_hdr), sizeof(struct tcphdr) + optlen + len); } else { - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + m->m_pkthdr.csum_thlen = sizeof(struct tcphdr) + optlen; if (use_tso) { m->m_pkthdr.csum_flags = CSUM_TSO; m->m_pkthdr.segsz = segsz; } else { m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = + offsetof(struct tcphdr, th_sum); if (len + optlen) { th->th_sum = in_addword(th->th_sum, htons((u_short)(optlen + len))); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 6383fff37b..9a88fc732c 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -662,6 +662,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + m->m_pkthdr.csum_thlen = sizeof(struct tcphdr); } #ifdef TCPDEBUG if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 5eeaaa6d3b..5dbac2b35f 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1323,6 +1323,7 @@ no_options: htons(tlen - hlen + IPPROTO_TCP)); m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + m->m_pkthdr.csum_thlen = sizeof(struct tcphdr) + optlen; error = ip_output(m, sc->sc_ipopts, &sc->sc_route, IP_DEBUGROUTE, NULL, sc->sc_tp->t_inpcb); } diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index e5547ddc6d..71609586fb 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -895,6 +895,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *dstaddr, htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); m->m_pkthdr.csum_flags = CSUM_UDP; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + m->m_pkthdr.csum_thlen = sizeof(struct udphdr); } else { ui->ui_sum = 0; } diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 2040ff1eda..74c5ff885b 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -162,6 +162,11 @@ struct pkthdr { /* variables for hardware checksum */ int csum_flags; /* flags regarding checksum */ int csum_data; /* data field used by csum routines */ + uint16_t csum_iphlen; /* IP header length */ + /* valid if CSUM IP|UDP|TCP|TSO */ + uint8_t csum_thlen; /* TCP/UDP header length */ + /* valid if CSUM UDP|TCP|TSO */ + uint8_t csum_lhlen; /* link header length */ /* firewall flags */ uint32_t fw_flags; /* flags for PF */ -- 2.41.0