2 * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: socket.c,v 1.207.2.19.2.22 2005/11/03 23:08:42 marka Exp $ */
22 #include <sys/param.h>
23 #include <sys/types.h>
24 #include <sys/socket.h>
35 #include <isc/buffer.h>
36 #include <isc/bufferlist.h>
37 #include <isc/condition.h>
38 #include <isc/formatcheck.h>
43 #include <isc/mutex.h>
45 #include <isc/platform.h>
46 #include <isc/print.h>
47 #include <isc/region.h>
48 #include <isc/socket.h>
49 #include <isc/strerror.h>
51 #include <isc/thread.h>
54 #include "errno2result.h"
56 #ifndef ISC_PLATFORM_USETHREADS
58 #endif /* ISC_PLATFORM_USETHREADS */
61 * Some systems define the socket length argument as an int, some as size_t,
62 * some as socklen_t. This is here so it can be easily changed if needed.
64 #ifndef ISC_SOCKADDR_LEN_T
65 #define ISC_SOCKADDR_LEN_T unsigned int
69 * Define what the possible "soft" errors can be. These are non-fatal returns
70 * of various network related functions, like recv() and so on.
72 * For some reason, BSDI (and perhaps others) will sometimes return <0
73 * from recv() but will have errno==0. This is broken, but we have to
74 * work around it here.
76 #define SOFT_ERROR(e) ((e) == EAGAIN || \
77 (e) == EWOULDBLOCK || \
81 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
84 * DLVL(90) -- Function entry/exit and other tracing.
85 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
86 * DLVL(60) -- Socket data send/receive
87 * DLVL(50) -- Event tracing, including receiving/sending completion events.
88 * DLVL(20) -- Socket creation/destruction.
90 #define TRACE_LEVEL 90
91 #define CORRECTNESS_LEVEL 70
92 #define IOEVENT_LEVEL 60
93 #define EVENT_LEVEL 50
94 #define CREATION_LEVEL 20
96 #define TRACE DLVL(TRACE_LEVEL)
97 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
98 #define IOEVENT DLVL(IOEVENT_LEVEL)
99 #define EVENT DLVL(EVENT_LEVEL)
100 #define CREATION DLVL(CREATION_LEVEL)
102 typedef isc_event_t intev_t;
104 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
105 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
108 * IPv6 control information. If the socket is an IPv6 socket we want
109 * to collect the destination address and interface so the client can
110 * set them on outgoing packets.
112 #ifdef ISC_PLATFORM_HAVEIPV6
119 * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have
120 * a setsockopt() like interface to request timestamps, and if the OS
121 * doesn't do it for us, call gettimeofday() on every UDP receive?
130 * The number of times a send operation is repeated if the result is EINTR.
137 isc_socketmgr_t *manager;
139 isc_sockettype_t type;
141 /* Locked by socket lock. */
142 ISC_LINK(isc_socket_t) link;
143 unsigned int references;
147 ISC_LIST(isc_socketevent_t) send_list;
148 ISC_LIST(isc_socketevent_t) recv_list;
149 ISC_LIST(isc_socket_newconnev_t) accept_list;
150 isc_socket_connev_t *connect_ev;
153 * Internal events. Posted when a descriptor is readable or
154 * writable. These are statically allocated and never freed.
155 * They will be set to non-purgable before use.
160 isc_sockaddr_t address; /* remote address */
162 unsigned int pending_recv : 1,
165 listener : 1, /* listener socket */
167 connecting : 1, /* connect pending */
168 bound : 1; /* bound to local addr */
170 #ifdef ISC_NET_RECVOVERFLOW
171 unsigned char overflow; /* used for MSG_TRUNC fake */
175 ISC_SOCKADDR_LEN_T recvcmsgbuflen;
177 ISC_SOCKADDR_LEN_T sendcmsgbuflen;
180 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
181 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
183 struct isc_socketmgr {
188 /* Locked by manager lock. */
189 ISC_LIST(isc_socket_t) socklist;
192 isc_socket_t *fds[FD_SETSIZE];
193 int fdstate[FD_SETSIZE];
195 #ifdef ISC_PLATFORM_USETHREADS
196 isc_thread_t watcher;
197 isc_condition_t shutdown_ok;
199 #else /* ISC_PLATFORM_USETHREADS */
201 #endif /* ISC_PLATFORM_USETHREADS */
204 #ifndef ISC_PLATFORM_USETHREADS
205 static isc_socketmgr_t *socketmgr = NULL;
206 #endif /* ISC_PLATFORM_USETHREADS */
208 #define CLOSED 0 /* this one must be zero */
210 #define CLOSE_PENDING 2
213 * send() and recv() iovec counts
215 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
216 #ifdef ISC_NET_RECVOVERFLOW
217 # define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER + 1)
219 # define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
222 static void send_recvdone_event(isc_socket_t *, isc_socketevent_t **);
223 static void send_senddone_event(isc_socket_t *, isc_socketevent_t **);
224 static void free_socket(isc_socket_t **);
225 static isc_result_t allocate_socket(isc_socketmgr_t *, isc_sockettype_t,
227 static void destroy(isc_socket_t **);
228 static void internal_accept(isc_task_t *, isc_event_t *);
229 static void internal_connect(isc_task_t *, isc_event_t *);
230 static void internal_recv(isc_task_t *, isc_event_t *);
231 static void internal_send(isc_task_t *, isc_event_t *);
232 static void process_cmsg(isc_socket_t *, struct msghdr *, isc_socketevent_t *);
233 static void build_msghdr_send(isc_socket_t *, isc_socketevent_t *,
234 struct msghdr *, struct iovec *, size_t *);
235 static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *,
236 struct msghdr *, struct iovec *, size_t *);
238 #define SELECT_POKE_SHUTDOWN (-1)
239 #define SELECT_POKE_NOTHING (-2)
240 #define SELECT_POKE_READ (-3)
241 #define SELECT_POKE_ACCEPT (-3) /* Same as _READ */
242 #define SELECT_POKE_WRITE (-4)
243 #define SELECT_POKE_CONNECT (-4) /* Same as _WRITE */
244 #define SELECT_POKE_CLOSE (-5)
246 #define SOCK_DEAD(s) ((s)->references == 0)
249 manager_log(isc_socketmgr_t *sockmgr,
250 isc_logcategory_t *category, isc_logmodule_t *module, int level,
251 const char *fmt, ...) ISC_FORMAT_PRINTF(5, 6);
253 manager_log(isc_socketmgr_t *sockmgr,
254 isc_logcategory_t *category, isc_logmodule_t *module, int level,
255 const char *fmt, ...)
260 if (! isc_log_wouldlog(isc_lctx, level))
264 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
267 isc_log_write(isc_lctx, category, module, level,
268 "sockmgr %p: %s", sockmgr, msgbuf);
272 socket_log(isc_socket_t *sock, isc_sockaddr_t *address,
273 isc_logcategory_t *category, isc_logmodule_t *module, int level,
274 isc_msgcat_t *msgcat, int msgset, int message,
275 const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
277 socket_log(isc_socket_t *sock, isc_sockaddr_t *address,
278 isc_logcategory_t *category, isc_logmodule_t *module, int level,
279 isc_msgcat_t *msgcat, int msgset, int message,
280 const char *fmt, ...)
283 char peerbuf[ISC_SOCKADDR_FORMATSIZE];
286 if (! isc_log_wouldlog(isc_lctx, level))
290 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
293 if (address == NULL) {
294 isc_log_iwrite(isc_lctx, category, module, level,
295 msgcat, msgset, message,
296 "socket %p: %s", sock, msgbuf);
298 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
299 isc_log_iwrite(isc_lctx, category, module, level,
300 msgcat, msgset, message,
301 "socket %p %s: %s", sock, peerbuf, msgbuf);
306 wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) {
310 * This is a wakeup on a socket. If the socket is not in the
311 * process of being closed, start watching it for either reads
315 INSIST(fd >= 0 && fd < (int)FD_SETSIZE);
317 if (manager->fdstate[fd] == CLOSE_PENDING) {
318 manager->fdstate[fd] = CLOSED;
319 FD_CLR(fd, &manager->read_fds);
320 FD_CLR(fd, &manager->write_fds);
324 if (manager->fdstate[fd] != MANAGED)
327 sock = manager->fds[fd];
332 if (msg == SELECT_POKE_READ)
333 FD_SET(sock->fd, &manager->read_fds);
334 if (msg == SELECT_POKE_WRITE)
335 FD_SET(sock->fd, &manager->write_fds);
338 #ifdef ISC_PLATFORM_USETHREADS
340 * Poke the select loop when there is something for us to do.
341 * The write is required (by POSIX) to complete. That is, we
342 * will not get partial writes.
345 select_poke(isc_socketmgr_t *mgr, int fd, int msg) {
348 char strbuf[ISC_STRERRORSIZE];
354 cc = write(mgr->pipe_fds[1], buf, sizeof(buf));
357 * Treat ENOSR as EAGAIN but loop slowly as it is
358 * unlikely to clear fast.
360 if (cc < 0 && errno == ENOSR) {
365 } while (cc < 0 && SOFT_ERROR(errno));
368 isc__strerror(errno, strbuf, sizeof(strbuf));
369 FATAL_ERROR(__FILE__, __LINE__,
370 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
373 "during watcher poke: %s"),
377 INSIST(cc == sizeof(buf));
381 * Read a message on the internal fd.
384 select_readmsg(isc_socketmgr_t *mgr, int *fd, int *msg) {
387 char strbuf[ISC_STRERRORSIZE];
389 cc = read(mgr->pipe_fds[0], buf, sizeof(buf));
391 *msg = SELECT_POKE_NOTHING;
392 *fd = -1; /* Silence compiler. */
393 if (SOFT_ERROR(errno))
396 isc__strerror(errno, strbuf, sizeof(strbuf));
397 FATAL_ERROR(__FILE__, __LINE__,
398 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
401 "during watcher poke: %s"),
406 INSIST(cc == sizeof(buf));
411 #else /* ISC_PLATFORM_USETHREADS */
413 * Update the state of the socketmgr when something changes.
416 select_poke(isc_socketmgr_t *manager, int fd, int msg) {
417 if (msg == SELECT_POKE_SHUTDOWN)
420 wakeup_socket(manager, fd, msg);
423 #endif /* ISC_PLATFORM_USETHREADS */
426 * Make a fd non-blocking.
429 make_nonblock(int fd) {
432 char strbuf[ISC_STRERRORSIZE];
433 #ifdef USE_FIONBIO_IOCTL
436 ret = ioctl(fd, FIONBIO, (char *)&on);
438 flags = fcntl(fd, F_GETFL, 0);
439 flags |= PORT_NONBLOCK;
440 ret = fcntl(fd, F_SETFL, flags);
444 isc__strerror(errno, strbuf, sizeof(strbuf));
445 UNEXPECTED_ERROR(__FILE__, __LINE__,
446 #ifdef USE_FIONBIO_IOCTL
447 "ioctl(%d, FIONBIO, &on): %s", fd,
449 "fcntl(%d, F_SETFL, %d): %s", fd, flags,
453 return (ISC_R_UNEXPECTED);
456 return (ISC_R_SUCCESS);
461 * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE.
462 * In order to ensure as much portability as possible, we provide wrapper
463 * functions of these macros.
464 * Note that cmsg_space() could run slow on OSes that do not have
467 static inline ISC_SOCKADDR_LEN_T
468 cmsg_len(ISC_SOCKADDR_LEN_T len) {
470 return (CMSG_LEN(len));
472 ISC_SOCKADDR_LEN_T hdrlen;
475 * Cast NULL so that any pointer arithmetic performed by CMSG_DATA
478 hdrlen = (ISC_SOCKADDR_LEN_T)CMSG_DATA(((struct cmsghdr *)NULL));
479 return (hdrlen + len);
483 static inline ISC_SOCKADDR_LEN_T
484 cmsg_space(ISC_SOCKADDR_LEN_T len) {
486 return (CMSG_SPACE(len));
489 struct cmsghdr *cmsgp;
491 * XXX: The buffer length is an ad-hoc value, but should be enough
492 * in a practical sense.
494 char dummybuf[sizeof(struct cmsghdr) + 1024];
496 memset(&msg, 0, sizeof(msg));
497 msg.msg_control = dummybuf;
498 msg.msg_controllen = sizeof(dummybuf);
500 cmsgp = (struct cmsghdr *)dummybuf;
501 cmsgp->cmsg_len = cmsg_len(len);
503 cmsgp = CMSG_NXTHDR(&msg, cmsgp);
505 return ((char *)cmsgp - (char *)msg.msg_control);
510 #endif /* USE_CMSG */
513 * Process control messages received on a socket.
516 process_cmsg(isc_socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) {
518 struct cmsghdr *cmsgp;
519 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
520 struct in6_pktinfo *pktinfop;
523 struct timeval *timevalp;
528 * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined.
529 * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined.
530 * They are all here, outside of the CPP tests, because it is
531 * more consistent with the usual ISC coding style.
537 #ifdef ISC_NET_BSD44MSGHDR
540 if ((msg->msg_flags & MSG_TRUNC) == MSG_TRUNC)
541 dev->attributes |= ISC_SOCKEVENTATTR_TRUNC;
545 if ((msg->msg_flags & MSG_CTRUNC) == MSG_CTRUNC)
546 dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC;
552 if (msg->msg_controllen == 0U || msg->msg_control == NULL)
558 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
562 cmsgp = CMSG_FIRSTHDR(msg);
563 while (cmsgp != NULL) {
564 socket_log(sock, NULL, TRACE,
565 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_PROCESSCMSG,
566 "processing cmsg %p", cmsgp);
568 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
569 if (cmsgp->cmsg_level == IPPROTO_IPV6
570 && cmsgp->cmsg_type == IPV6_PKTINFO) {
572 pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp);
573 memcpy(&dev->pktinfo, pktinfop,
574 sizeof(struct in6_pktinfo));
575 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
576 socket_log(sock, NULL, TRACE,
577 isc_msgcat, ISC_MSGSET_SOCKET,
579 "interface received on ifindex %u",
580 dev->pktinfo.ipi6_ifindex);
581 if (IN6_IS_ADDR_MULTICAST(&pktinfop->ipi6_addr))
582 dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST;
588 if (cmsgp->cmsg_level == SOL_SOCKET
589 && cmsgp->cmsg_type == SCM_TIMESTAMP) {
590 timevalp = (struct timeval *)CMSG_DATA(cmsgp);
591 dev->timestamp.seconds = timevalp->tv_sec;
592 dev->timestamp.nanoseconds = timevalp->tv_usec * 1000;
593 dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP;
599 cmsgp = CMSG_NXTHDR(msg, cmsgp);
601 #endif /* USE_CMSG */
603 #endif /* ISC_NET_BSD44MSGHDR */
607 * Construct an iov array and attach it to the msghdr passed in. This is
608 * the SEND constructor, which will use the used region of the buffer
609 * (if using a buffer list) or will use the internal region (if a single
610 * buffer I/O is requested).
612 * Nothing can be NULL, and the done event must list at least one buffer
613 * on the buffer linked list for this function to be meaningful.
615 * If write_countp != NULL, *write_countp will hold the number of bytes
616 * this transaction can send.
619 build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
620 struct msghdr *msg, struct iovec *iov, size_t *write_countp)
622 unsigned int iovcount;
623 isc_buffer_t *buffer;
628 memset(msg, 0, sizeof(*msg));
630 if (sock->type == isc_sockettype_udp) {
631 msg->msg_name = (void *)&dev->address.type.sa;
632 msg->msg_namelen = dev->address.length;
634 msg->msg_name = NULL;
635 msg->msg_namelen = 0;
638 buffer = ISC_LIST_HEAD(dev->bufferlist);
643 * Single buffer I/O? Skip what we've done so far in this region.
645 if (buffer == NULL) {
646 write_count = dev->region.length - dev->n;
647 iov[0].iov_base = (void *)(dev->region.base + dev->n);
648 iov[0].iov_len = write_count;
656 * Skip the data in the buffer list that we have already written.
659 while (buffer != NULL) {
660 REQUIRE(ISC_BUFFER_VALID(buffer));
661 if (skip_count < isc_buffer_usedlength(buffer))
663 skip_count -= isc_buffer_usedlength(buffer);
664 buffer = ISC_LIST_NEXT(buffer, link);
667 while (buffer != NULL) {
668 INSIST(iovcount < MAXSCATTERGATHER_SEND);
670 isc_buffer_usedregion(buffer, &used);
672 if (used.length > 0) {
673 iov[iovcount].iov_base = (void *)(used.base
675 iov[iovcount].iov_len = used.length - skip_count;
676 write_count += (used.length - skip_count);
680 buffer = ISC_LIST_NEXT(buffer, link);
683 INSIST(skip_count == 0U);
687 msg->msg_iovlen = iovcount;
689 #ifdef ISC_NET_BSD44MSGHDR
690 msg->msg_control = NULL;
691 msg->msg_controllen = 0;
693 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO)
694 if ((sock->type == isc_sockettype_udp)
695 && ((dev->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0)) {
696 struct cmsghdr *cmsgp;
697 struct in6_pktinfo *pktinfop;
699 socket_log(sock, NULL, TRACE,
700 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_SENDTODATA,
701 "sendto pktinfo data, ifindex %u",
702 dev->pktinfo.ipi6_ifindex);
704 msg->msg_controllen = cmsg_space(sizeof(struct in6_pktinfo));
705 INSIST(msg->msg_controllen <= sock->sendcmsgbuflen);
706 msg->msg_control = (void *)sock->sendcmsgbuf;
708 cmsgp = (struct cmsghdr *)sock->sendcmsgbuf;
709 cmsgp->cmsg_level = IPPROTO_IPV6;
710 cmsgp->cmsg_type = IPV6_PKTINFO;
711 cmsgp->cmsg_len = cmsg_len(sizeof(struct in6_pktinfo));
712 pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp);
713 memcpy(pktinfop, &dev->pktinfo, sizeof(struct in6_pktinfo));
715 #endif /* USE_CMSG && ISC_PLATFORM_HAVEIPV6 */
716 #else /* ISC_NET_BSD44MSGHDR */
717 msg->msg_accrights = NULL;
718 msg->msg_accrightslen = 0;
719 #endif /* ISC_NET_BSD44MSGHDR */
721 if (write_countp != NULL)
722 *write_countp = write_count;
726 * Construct an iov array and attach it to the msghdr passed in. This is
727 * the RECV constructor, which will use the avialable region of the buffer
728 * (if using a buffer list) or will use the internal region (if a single
729 * buffer I/O is requested).
731 * Nothing can be NULL, and the done event must list at least one buffer
732 * on the buffer linked list for this function to be meaningful.
734 * If read_countp != NULL, *read_countp will hold the number of bytes
735 * this transaction can receive.
738 build_msghdr_recv(isc_socket_t *sock, isc_socketevent_t *dev,
739 struct msghdr *msg, struct iovec *iov, size_t *read_countp)
741 unsigned int iovcount;
742 isc_buffer_t *buffer;
743 isc_region_t available;
746 memset(msg, 0, sizeof(struct msghdr));
748 if (sock->type == isc_sockettype_udp) {
749 memset(&dev->address, 0, sizeof(dev->address));
750 msg->msg_name = (void *)&dev->address.type.sa;
751 msg->msg_namelen = sizeof(dev->address.type);
752 #ifdef ISC_NET_RECVOVERFLOW
753 /* If needed, steal one iovec for overflow detection. */
757 msg->msg_name = NULL;
758 msg->msg_namelen = 0;
759 dev->address = sock->address;
762 buffer = ISC_LIST_HEAD(dev->bufferlist);
766 * Single buffer I/O? Skip what we've done so far in this region.
768 if (buffer == NULL) {
769 read_count = dev->region.length - dev->n;
770 iov[0].iov_base = (void *)(dev->region.base + dev->n);
771 iov[0].iov_len = read_count;
779 * Skip empty buffers.
781 while (buffer != NULL) {
782 REQUIRE(ISC_BUFFER_VALID(buffer));
783 if (isc_buffer_availablelength(buffer) != 0)
785 buffer = ISC_LIST_NEXT(buffer, link);
789 while (buffer != NULL) {
790 INSIST(iovcount < MAXSCATTERGATHER_RECV);
792 isc_buffer_availableregion(buffer, &available);
794 if (available.length > 0) {
795 iov[iovcount].iov_base = (void *)(available.base);
796 iov[iovcount].iov_len = available.length;
797 read_count += available.length;
800 buffer = ISC_LIST_NEXT(buffer, link);
806 * If needed, set up to receive that one extra byte. Note that
807 * we know there is at least one iov left, since we stole it
808 * at the top of this function.
810 #ifdef ISC_NET_RECVOVERFLOW
811 if (sock->type == isc_sockettype_udp) {
812 iov[iovcount].iov_base = (void *)(&sock->overflow);
813 iov[iovcount].iov_len = 1;
819 msg->msg_iovlen = iovcount;
821 #ifdef ISC_NET_BSD44MSGHDR
822 msg->msg_control = NULL;
823 msg->msg_controllen = 0;
825 #if defined(USE_CMSG)
826 if (sock->type == isc_sockettype_udp) {
827 msg->msg_control = sock->recvcmsgbuf;
828 msg->msg_controllen = sock->recvcmsgbuflen;
830 #endif /* USE_CMSG */
831 #else /* ISC_NET_BSD44MSGHDR */
832 msg->msg_accrights = NULL;
833 msg->msg_accrightslen = 0;
834 #endif /* ISC_NET_BSD44MSGHDR */
836 if (read_countp != NULL)
837 *read_countp = read_count;
841 set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
842 isc_socketevent_t *dev)
844 if (sock->type == isc_sockettype_udp) {
846 dev->address = *address;
848 dev->address = sock->address;
849 } else if (sock->type == isc_sockettype_tcp) {
850 INSIST(address == NULL);
851 dev->address = sock->address;
855 static isc_socketevent_t *
856 allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype,
857 isc_taskaction_t action, const void *arg)
859 isc_socketevent_t *ev;
861 ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx,
869 ev->result = ISC_R_UNEXPECTED;
870 ISC_LINK_INIT(ev, ev_link);
871 ISC_LIST_INIT(ev->bufferlist);
872 ev->region.base = NULL;
880 #if defined(ISC_SOCKET_DEBUG)
882 dump_msg(struct msghdr *msg) {
885 printf("MSGHDR %p\n", msg);
886 printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
887 printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
888 for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
889 printf("\t\t%d\tbase %p, len %d\n", i,
890 msg->msg_iov[i].iov_base,
891 msg->msg_iov[i].iov_len);
892 #ifdef ISC_NET_BSD44MSGHDR
893 printf("\tcontrol %p, controllen %d\n", msg->msg_control,
894 msg->msg_controllen);
899 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
900 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
901 #define DOIO_HARD 2 /* i/o error, event sent */
902 #define DOIO_EOF 3 /* EOF, no event sent */
905 doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
907 struct iovec iov[MAXSCATTERGATHER_RECV];
910 struct msghdr msghdr;
911 isc_buffer_t *buffer;
913 char strbuf[ISC_STRERRORSIZE];
915 build_msghdr_recv(sock, dev, &msghdr, iov, &read_count);
917 #if defined(ISC_SOCKET_DEBUG)
921 cc = recvmsg(sock->fd, &msghdr, 0);
925 if (SOFT_ERROR(recv_errno))
928 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
929 isc__strerror(recv_errno, strbuf, sizeof(strbuf));
930 socket_log(sock, NULL, IOEVENT,
931 isc_msgcat, ISC_MSGSET_SOCKET,
933 "doio_recv: recvmsg(%d) %d bytes, err %d/%s",
934 sock->fd, cc, recv_errno, strbuf);
937 #define SOFT_OR_HARD(_system, _isc) \
938 if (recv_errno == _system) { \
939 if (sock->connected) { \
940 dev->result = _isc; \
941 return (DOIO_HARD); \
943 return (DOIO_SOFT); \
945 #define ALWAYS_HARD(_system, _isc) \
946 if (recv_errno == _system) { \
947 dev->result = _isc; \
948 return (DOIO_HARD); \
951 SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED);
952 SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH);
953 SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH);
954 SOFT_OR_HARD(EHOSTDOWN, ISC_R_HOSTDOWN);
955 /* HPUX 11.11 can return EADDRNOTAVAIL. */
956 SOFT_OR_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
957 ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES);
962 dev->result = isc__errno2result(recv_errno);
967 * On TCP, zero length reads indicate EOF, while on
968 * UDP, zero length reads are perfectly valid, although
971 if ((sock->type == isc_sockettype_tcp) && (cc == 0))
974 if (sock->type == isc_sockettype_udp) {
975 dev->address.length = msghdr.msg_namelen;
976 if (isc_sockaddr_getport(&dev->address) == 0) {
977 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
978 socket_log(sock, &dev->address, IOEVENT,
979 isc_msgcat, ISC_MSGSET_SOCKET,
981 "dropping source port zero packet");
987 socket_log(sock, &dev->address, IOEVENT,
988 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_PKTRECV,
989 "packet received correctly");
992 * Overflow bit detection. If we received MORE bytes than we should,
993 * this indicates an overflow situation. Set the flag in the
994 * dev entry and adjust how much we read by one.
996 #ifdef ISC_NET_RECVOVERFLOW
997 if ((sock->type == isc_sockettype_udp) && ((size_t)cc > read_count)) {
998 dev->attributes |= ISC_SOCKEVENTATTR_TRUNC;
1004 * If there are control messages attached, run through them and pull
1005 * out the interesting bits.
1007 if (sock->type == isc_sockettype_udp)
1008 process_cmsg(sock, &msghdr, dev);
1011 * update the buffers (if any) and the i/o count
1015 buffer = ISC_LIST_HEAD(dev->bufferlist);
1016 while (buffer != NULL && actual_count > 0U) {
1017 REQUIRE(ISC_BUFFER_VALID(buffer));
1018 if (isc_buffer_availablelength(buffer) <= actual_count) {
1019 actual_count -= isc_buffer_availablelength(buffer);
1020 isc_buffer_add(buffer,
1021 isc_buffer_availablelength(buffer));
1023 isc_buffer_add(buffer, actual_count);
1027 buffer = ISC_LIST_NEXT(buffer, link);
1028 if (buffer == NULL) {
1029 INSIST(actual_count == 0U);
1034 * If we read less than we expected, update counters,
1035 * and let the upper layer poke the descriptor.
1037 if (((size_t)cc != read_count) && (dev->n < dev->minimum))
1041 * Full reads are posted, or partials if partials are ok.
1043 dev->result = ISC_R_SUCCESS;
1044 return (DOIO_SUCCESS);
1049 * DOIO_SUCCESS The operation succeeded. dev->result contains
1052 * DOIO_HARD A hard or unexpected I/O error was encountered.
1053 * dev->result contains the appropriate error.
1055 * DOIO_SOFT A soft I/O error was encountered. No senddone
1056 * event was sent. The operation should be retried.
1058 * No other return values are possible.
1061 doio_send(isc_socket_t *sock, isc_socketevent_t *dev) {
1063 struct iovec iov[MAXSCATTERGATHER_SEND];
1065 struct msghdr msghdr;
1066 char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1069 char strbuf[ISC_STRERRORSIZE];
1071 build_msghdr_send(sock, dev, &msghdr, iov, &write_count);
1074 cc = sendmsg(sock->fd, &msghdr, 0);
1078 * Check for error or block condition.
1081 if (send_errno == EINTR && ++attempts < NRETRIES)
1084 if (SOFT_ERROR(send_errno))
1087 #define SOFT_OR_HARD(_system, _isc) \
1088 if (send_errno == _system) { \
1089 if (sock->connected) { \
1090 dev->result = _isc; \
1091 return (DOIO_HARD); \
1093 return (DOIO_SOFT); \
1095 #define ALWAYS_HARD(_system, _isc) \
1096 if (send_errno == _system) { \
1097 dev->result = _isc; \
1098 return (DOIO_HARD); \
1101 SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED);
1102 ALWAYS_HARD(EACCES, ISC_R_NOPERM);
1103 ALWAYS_HARD(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
1104 ALWAYS_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
1105 ALWAYS_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH);
1107 ALWAYS_HARD(EHOSTDOWN, ISC_R_HOSTUNREACH);
1109 ALWAYS_HARD(ENETUNREACH, ISC_R_NETUNREACH);
1110 ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES);
1111 ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH);
1112 ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED);
1113 ALWAYS_HARD(ECONNRESET, ISC_R_CONNECTIONRESET);
1119 * The other error types depend on whether or not the
1120 * socket is UDP or TCP. If it is UDP, some errors
1121 * that we expect to be fatal under TCP are merely
1122 * annoying, and are really soft errors.
1124 * However, these soft errors are still returned as
1127 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf));
1128 isc__strerror(send_errno, strbuf, sizeof(strbuf));
1129 UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_send: %s: %s",
1131 dev->result = isc__errno2result(send_errno);
1136 UNEXPECTED_ERROR(__FILE__, __LINE__,
1137 "internal_send: send() %s 0",
1138 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
1139 ISC_MSG_RETURNED, "returned"));
1142 * If we write less than we expected, update counters, poke.
1145 if ((size_t)cc != write_count)
1149 * Exactly what we wanted to write. We're done with this
1150 * entry. Post its completion event.
1152 dev->result = ISC_R_SUCCESS;
1153 return (DOIO_SUCCESS);
1159 * Caller must ensure that the socket is not locked and no external
1163 destroy(isc_socket_t **sockp) {
1164 isc_socket_t *sock = *sockp;
1165 isc_socketmgr_t *manager = sock->manager;
1167 socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1168 ISC_MSG_DESTROYING, "destroying");
1170 INSIST(ISC_LIST_EMPTY(sock->accept_list));
1171 INSIST(ISC_LIST_EMPTY(sock->recv_list));
1172 INSIST(ISC_LIST_EMPTY(sock->send_list));
1173 INSIST(sock->connect_ev == NULL);
1174 REQUIRE(sock->fd >= 0 && sock->fd < (int)FD_SETSIZE);
1176 LOCK(&manager->lock);
1179 * No one has this socket open, so the watcher doesn't have to be
1180 * poked, and the socket doesn't have to be locked.
1182 manager->fds[sock->fd] = NULL;
1183 manager->fdstate[sock->fd] = CLOSE_PENDING;
1184 select_poke(manager, sock->fd, SELECT_POKE_CLOSE);
1185 ISC_LIST_UNLINK(manager->socklist, sock, link);
1187 #ifdef ISC_PLATFORM_USETHREADS
1188 if (ISC_LIST_EMPTY(manager->socklist))
1189 SIGNAL(&manager->shutdown_ok);
1190 #endif /* ISC_PLATFORM_USETHREADS */
1193 * XXX should reset manager->maxfd here
1196 UNLOCK(&manager->lock);
1202 allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
1203 isc_socket_t **socketp)
1207 ISC_SOCKADDR_LEN_T cmsgbuflen;
1209 sock = isc_mem_get(manager->mctx, sizeof(*sock));
1212 return (ISC_R_NOMEMORY);
1214 ret = ISC_R_UNEXPECTED;
1217 sock->references = 0;
1219 sock->manager = manager;
1223 ISC_LINK_INIT(sock, link);
1225 sock->recvcmsgbuf = NULL;
1226 sock->sendcmsgbuf = NULL;
1229 * set up cmsg buffers
1232 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO)
1233 cmsgbuflen = cmsg_space(sizeof(struct in6_pktinfo));
1235 #if defined(USE_CMSG) && defined(SO_TIMESTAMP)
1236 cmsgbuflen += cmsg_space(sizeof(struct timeval));
1238 sock->recvcmsgbuflen = cmsgbuflen;
1239 if (sock->recvcmsgbuflen != 0U) {
1240 sock->recvcmsgbuf = isc_mem_get(manager->mctx, cmsgbuflen);
1241 if (sock->recvcmsgbuf == NULL)
1246 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO)
1247 cmsgbuflen = cmsg_space(sizeof(struct in6_pktinfo));
1249 sock->sendcmsgbuflen = cmsgbuflen;
1250 if (sock->sendcmsgbuflen != 0U) {
1251 sock->sendcmsgbuf = isc_mem_get(manager->mctx, cmsgbuflen);
1252 if (sock->sendcmsgbuf == NULL)
1257 * set up list of readers and writers to be initially empty
1259 ISC_LIST_INIT(sock->recv_list);
1260 ISC_LIST_INIT(sock->send_list);
1261 ISC_LIST_INIT(sock->accept_list);
1262 sock->connect_ev = NULL;
1263 sock->pending_recv = 0;
1264 sock->pending_send = 0;
1265 sock->pending_accept = 0;
1267 sock->connected = 0;
1268 sock->connecting = 0;
1272 * initialize the lock
1274 if (isc_mutex_init(&sock->lock) != ISC_R_SUCCESS) {
1276 UNEXPECTED_ERROR(__FILE__, __LINE__,
1277 "isc_mutex_init() %s",
1278 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
1279 ISC_MSG_FAILED, "failed"));
1280 ret = ISC_R_UNEXPECTED;
1285 * Initialize readable and writable events
1287 ISC_EVENT_INIT(&sock->readable_ev, sizeof(intev_t),
1288 ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTR,
1289 NULL, sock, sock, NULL, NULL);
1290 ISC_EVENT_INIT(&sock->writable_ev, sizeof(intev_t),
1291 ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTW,
1292 NULL, sock, sock, NULL, NULL);
1294 sock->magic = SOCKET_MAGIC;
1297 return (ISC_R_SUCCESS);
1300 if (sock->recvcmsgbuf != NULL)
1301 isc_mem_put(manager->mctx, sock->recvcmsgbuf,
1302 sock->recvcmsgbuflen);
1303 if (sock->sendcmsgbuf != NULL)
1304 isc_mem_put(manager->mctx, sock->sendcmsgbuf,
1305 sock->sendcmsgbuflen);
1306 isc_mem_put(manager->mctx, sock, sizeof(*sock));
1312 * This event requires that the various lists be empty, that the reference
1313 * count be 1, and that the magic number is valid. The other socket bits,
1314 * like the lock, must be initialized as well. The fd associated must be
1315 * marked as closed, by setting it to -1 on close, or this routine will
1316 * also close the socket.
1319 free_socket(isc_socket_t **socketp) {
1320 isc_socket_t *sock = *socketp;
1322 INSIST(sock->references == 0);
1323 INSIST(VALID_SOCKET(sock));
1324 INSIST(!sock->connecting);
1325 INSIST(!sock->pending_recv);
1326 INSIST(!sock->pending_send);
1327 INSIST(!sock->pending_accept);
1328 INSIST(ISC_LIST_EMPTY(sock->recv_list));
1329 INSIST(ISC_LIST_EMPTY(sock->send_list));
1330 INSIST(ISC_LIST_EMPTY(sock->accept_list));
1331 INSIST(!ISC_LINK_LINKED(sock, link));
1333 if (sock->recvcmsgbuf != NULL)
1334 isc_mem_put(sock->manager->mctx, sock->recvcmsgbuf,
1335 sock->recvcmsgbuflen);
1336 if (sock->sendcmsgbuf != NULL)
1337 isc_mem_put(sock->manager->mctx, sock->sendcmsgbuf,
1338 sock->sendcmsgbuflen);
1342 DESTROYLOCK(&sock->lock);
1344 isc_mem_put(sock->manager->mctx, sock, sizeof(*sock));
1350 * Create a new 'type' socket managed by 'manager'. Events
1351 * will be posted to 'task' and when dispatched 'action' will be
1352 * called with 'arg' as the arg value. The new socket is returned
1356 isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1357 isc_socket_t **socketp)
1359 isc_socket_t *sock = NULL;
1361 #if defined(USE_CMSG) || defined(SO_BSDCOMPAT)
1364 char strbuf[ISC_STRERRORSIZE];
1365 const char *err = "socket";
1367 REQUIRE(VALID_MANAGER(manager));
1368 REQUIRE(socketp != NULL && *socketp == NULL);
1370 ret = allocate_socket(manager, type, &sock);
1371 if (ret != ISC_R_SUCCESS)
1376 case isc_sockettype_udp:
1377 sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP);
1379 case isc_sockettype_tcp:
1380 sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP);
1386 * Leave a space for stdio to work in.
1388 if (sock->fd >= 0 && sock->fd < 20) {
1390 new = fcntl(sock->fd, F_DUPFD, 20);
1392 (void)close(sock->fd);
1395 err = "isc_socket_create: fcntl";
1399 if (sock->fd >= (int)FD_SETSIZE) {
1400 (void)close(sock->fd);
1401 isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
1402 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
1403 isc_msgcat, ISC_MSGSET_SOCKET,
1405 "%s: too many open file descriptors", "socket");
1407 return (ISC_R_NORESOURCES);
1417 return (ISC_R_NORESOURCES);
1419 case EPROTONOSUPPORT:
1423 * Linux 2.2 (and maybe others) return EINVAL instead of
1427 return (ISC_R_FAMILYNOSUPPORT);
1430 isc__strerror(errno, strbuf, sizeof(strbuf));
1431 UNEXPECTED_ERROR(__FILE__, __LINE__,
1433 isc_msgcat_get(isc_msgcat,
1438 return (ISC_R_UNEXPECTED);
1442 if (make_nonblock(sock->fd) != ISC_R_SUCCESS) {
1443 (void)close(sock->fd);
1445 return (ISC_R_UNEXPECTED);
1449 if (setsockopt(sock->fd, SOL_SOCKET, SO_BSDCOMPAT,
1450 (void *)&on, sizeof(on)) < 0) {
1451 isc__strerror(errno, strbuf, sizeof(strbuf));
1452 UNEXPECTED_ERROR(__FILE__, __LINE__,
1453 "setsockopt(%d, SO_BSDCOMPAT) %s: %s",
1455 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
1456 ISC_MSG_FAILED, "failed"),
1462 #if defined(USE_CMSG)
1463 if (type == isc_sockettype_udp) {
1465 #if defined(SO_TIMESTAMP)
1466 if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP,
1467 (void *)&on, sizeof(on)) < 0
1468 && errno != ENOPROTOOPT) {
1469 isc__strerror(errno, strbuf, sizeof(strbuf));
1470 UNEXPECTED_ERROR(__FILE__, __LINE__,
1471 "setsockopt(%d, SO_TIMESTAMP) %s: %s",
1473 isc_msgcat_get(isc_msgcat,
1480 #endif /* SO_TIMESTAMP */
1482 #if defined(ISC_PLATFORM_HAVEIPV6)
1483 if (pf == AF_INET6 && sock->recvcmsgbuflen == 0U) {
1485 * Warn explicitly because this anomaly can be hidden
1486 * in usual operation (and unexpectedly appear later).
1488 UNEXPECTED_ERROR(__FILE__, __LINE__,
1489 "No buffer available to receive "
1490 "IPv6 destination");
1492 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
1493 #ifdef IPV6_RECVPKTINFO
1495 if ((pf == AF_INET6)
1496 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1497 (void *)&on, sizeof(on)) < 0)) {
1498 isc__strerror(errno, strbuf, sizeof(strbuf));
1499 UNEXPECTED_ERROR(__FILE__, __LINE__,
1500 "setsockopt(%d, IPV6_RECVPKTINFO) "
1502 isc_msgcat_get(isc_msgcat,
1510 if ((pf == AF_INET6)
1511 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO,
1512 (void *)&on, sizeof(on)) < 0)) {
1513 isc__strerror(errno, strbuf, sizeof(strbuf));
1514 UNEXPECTED_ERROR(__FILE__, __LINE__,
1515 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1517 isc_msgcat_get(isc_msgcat,
1523 #endif /* IPV6_RECVPKTINFO */
1524 #endif /* ISC_PLATFORM_HAVEIN6PKTINFO */
1525 #ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/
1526 /* use minimum MTU */
1527 if (pf == AF_INET6) {
1528 (void)setsockopt(sock->fd, IPPROTO_IPV6,
1530 (void *)&on, sizeof(on));
1533 #endif /* ISC_PLATFORM_HAVEIPV6 */
1536 #endif /* USE_CMSG */
1538 sock->references = 1;
1541 LOCK(&manager->lock);
1544 * Note we don't have to lock the socket like we normally would because
1545 * there are no external references to it yet.
1548 manager->fds[sock->fd] = sock;
1549 manager->fdstate[sock->fd] = MANAGED;
1550 ISC_LIST_APPEND(manager->socklist, sock, link);
1551 if (manager->maxfd < sock->fd)
1552 manager->maxfd = sock->fd;
1554 UNLOCK(&manager->lock);
1556 socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1557 ISC_MSG_CREATED, "created");
1559 return (ISC_R_SUCCESS);
1563 * Attach to a socket. Caller must explicitly detach when it is done.
1566 isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
1567 REQUIRE(VALID_SOCKET(sock));
1568 REQUIRE(socketp != NULL && *socketp == NULL);
1572 UNLOCK(&sock->lock);
1578 * Dereference a socket. If this is the last reference to it, clean things
1579 * up by destroying the socket.
1582 isc_socket_detach(isc_socket_t **socketp) {
1584 isc_boolean_t kill_socket = ISC_FALSE;
1586 REQUIRE(socketp != NULL);
1588 REQUIRE(VALID_SOCKET(sock));
1591 REQUIRE(sock->references > 0);
1593 if (sock->references == 0)
1594 kill_socket = ISC_TRUE;
1595 UNLOCK(&sock->lock);
1604 * I/O is possible on a given socket. Schedule an event to this task that
1605 * will call an internal function to do the I/O. This will charge the
1606 * task with the I/O operation and let our select loop handler get back
1607 * to doing something real as fast as possible.
1609 * The socket and manager must be locked before calling this function.
1612 dispatch_recv(isc_socket_t *sock) {
1614 isc_socketevent_t *ev;
1616 INSIST(!sock->pending_recv);
1618 ev = ISC_LIST_HEAD(sock->recv_list);
1622 sock->pending_recv = 1;
1623 iev = &sock->readable_ev;
1625 socket_log(sock, NULL, EVENT, NULL, 0, 0,
1626 "dispatch_recv: event %p -> task %p", ev, ev->ev_sender);
1629 iev->ev_sender = sock;
1630 iev->ev_action = internal_recv;
1633 isc_task_send(ev->ev_sender, (isc_event_t **)&iev);
1637 dispatch_send(isc_socket_t *sock) {
1639 isc_socketevent_t *ev;
1641 INSIST(!sock->pending_send);
1643 ev = ISC_LIST_HEAD(sock->send_list);
1647 sock->pending_send = 1;
1648 iev = &sock->writable_ev;
1650 socket_log(sock, NULL, EVENT, NULL, 0, 0,
1651 "dispatch_send: event %p -> task %p", ev, ev->ev_sender);
1654 iev->ev_sender = sock;
1655 iev->ev_action = internal_send;
1658 isc_task_send(ev->ev_sender, (isc_event_t **)&iev);
1662 * Dispatch an internal accept event.
1665 dispatch_accept(isc_socket_t *sock) {
1667 isc_socket_newconnev_t *ev;
1669 INSIST(!sock->pending_accept);
1672 * Are there any done events left, or were they all canceled
1673 * before the manager got the socket lock?
1675 ev = ISC_LIST_HEAD(sock->accept_list);
1679 sock->pending_accept = 1;
1680 iev = &sock->readable_ev;
1682 sock->references++; /* keep socket around for this internal event */
1683 iev->ev_sender = sock;
1684 iev->ev_action = internal_accept;
1687 isc_task_send(ev->ev_sender, (isc_event_t **)&iev);
1691 dispatch_connect(isc_socket_t *sock) {
1693 isc_socket_connev_t *ev;
1695 iev = &sock->writable_ev;
1697 ev = sock->connect_ev;
1698 INSIST(ev != NULL); /* XXX */
1700 INSIST(sock->connecting);
1702 sock->references++; /* keep socket around for this internal event */
1703 iev->ev_sender = sock;
1704 iev->ev_action = internal_connect;
1707 isc_task_send(ev->ev_sender, (isc_event_t **)&iev);
1711 * Dequeue an item off the given socket's read queue, set the result code
1712 * in the done event to the one provided, and send it to the task it was
1715 * If the event to be sent is on a list, remove it before sending. If
1716 * asked to, send and detach from the socket as well.
1718 * Caller must have the socket locked if the event is attached to the socket.
1721 send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1724 task = (*dev)->ev_sender;
1726 (*dev)->ev_sender = sock;
1728 if (ISC_LINK_LINKED(*dev, ev_link))
1729 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
1731 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1732 == ISC_SOCKEVENTATTR_ATTACHED)
1733 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1735 isc_task_send(task, (isc_event_t **)dev);
1739 * See comments for send_recvdone_event() above.
1741 * Caller must have the socket locked if the event is attached to the socket.
1744 send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1747 INSIST(dev != NULL && *dev != NULL);
1749 task = (*dev)->ev_sender;
1750 (*dev)->ev_sender = sock;
1752 if (ISC_LINK_LINKED(*dev, ev_link))
1753 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
1755 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1756 == ISC_SOCKEVENTATTR_ATTACHED)
1757 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1759 isc_task_send(task, (isc_event_t **)dev);
1763 * Call accept() on a socket, to get the new file descriptor. The listen
1764 * socket is used as a prototype to create a new isc_socket_t. The new
1765 * socket has one outstanding reference. The task receiving the event
1766 * will be detached from just after the event is delivered.
1768 * On entry to this function, the event delivered is the internal
1769 * readable event, and the first item on the accept_list should be
1770 * the done event we want to send. If the list is empty, this is a no-op,
1771 * so just unlock and return.
1774 internal_accept(isc_task_t *me, isc_event_t *ev) {
1776 isc_socketmgr_t *manager;
1777 isc_socket_newconnev_t *dev;
1779 ISC_SOCKADDR_LEN_T addrlen;
1781 isc_result_t result = ISC_R_SUCCESS;
1782 char strbuf[ISC_STRERRORSIZE];
1783 const char *err = "accept";
1787 sock = ev->ev_sender;
1788 INSIST(VALID_SOCKET(sock));
1791 socket_log(sock, NULL, TRACE,
1792 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1793 "internal_accept called, locked socket");
1795 manager = sock->manager;
1796 INSIST(VALID_MANAGER(manager));
1798 INSIST(sock->listener);
1799 INSIST(sock->pending_accept == 1);
1800 sock->pending_accept = 0;
1802 INSIST(sock->references > 0);
1803 sock->references--; /* the internal event is done with this socket */
1804 if (sock->references == 0) {
1805 UNLOCK(&sock->lock);
1811 * Get the first item off the accept list.
1812 * If it is empty, unlock the socket and return.
1814 dev = ISC_LIST_HEAD(sock->accept_list);
1816 UNLOCK(&sock->lock);
1821 * Try to accept the new connection. If the accept fails with
1822 * EAGAIN or EINTR, simply poke the watcher to watch this socket
1823 * again. Also ignore ECONNRESET, which has been reported to
1824 * be spuriously returned on Linux 2.2.19 although it is not
1825 * a documented error for accept(). ECONNABORTED has been
1826 * reported for Solaris 8. The rest are thrown in not because
1827 * we have seen them but because they are ignored by other
1828 * deamons such as BIND 8 and Apache.
1831 addrlen = sizeof(dev->newsocket->address.type);
1832 memset(&dev->newsocket->address.type.sa, 0, addrlen);
1833 fd = accept(sock->fd, &dev->newsocket->address.type.sa,
1838 * Leave a space for stdio to work in.
1840 if (fd >= 0 && fd < 20) {
1842 new = fcntl(fd, F_DUPFD, 20);
1852 if (SOFT_ERROR(errno))
1875 isc__strerror(errno, strbuf, sizeof(strbuf));
1876 UNEXPECTED_ERROR(__FILE__, __LINE__,
1877 "internal_accept: %s() %s: %s", err,
1878 isc_msgcat_get(isc_msgcat,
1884 result = ISC_R_UNEXPECTED;
1886 if (addrlen == 0U) {
1887 UNEXPECTED_ERROR(__FILE__, __LINE__,
1888 "internal_accept(): "
1889 "accept() failed to return "
1894 } else if (dev->newsocket->address.type.sa.sa_family !=
1897 UNEXPECTED_ERROR(__FILE__, __LINE__,
1898 "internal_accept(): "
1899 "accept() returned peer address "
1900 "family %u (expected %u)",
1901 dev->newsocket->address.
1906 } else if (fd >= (int)FD_SETSIZE) {
1907 isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
1908 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
1909 isc_msgcat, ISC_MSGSET_SOCKET,
1911 "%s: too many open file descriptors",
1919 dev->newsocket->address.length = addrlen;
1920 dev->newsocket->pf = sock->pf;
1924 * Pull off the done event.
1926 ISC_LIST_UNLINK(sock->accept_list, dev, ev_link);
1929 * Poke watcher if there are more pending accepts.
1931 if (!ISC_LIST_EMPTY(sock->accept_list))
1932 select_poke(sock->manager, sock->fd, SELECT_POKE_ACCEPT);
1934 UNLOCK(&sock->lock);
1936 if (fd != -1 && (make_nonblock(fd) != ISC_R_SUCCESS)) {
1939 result = ISC_R_UNEXPECTED;
1943 * -1 means the new socket didn't happen.
1946 LOCK(&manager->lock);
1947 ISC_LIST_APPEND(manager->socklist, dev->newsocket, link);
1949 dev->newsocket->fd = fd;
1950 dev->newsocket->bound = 1;
1951 dev->newsocket->connected = 1;
1954 * Save away the remote address
1956 dev->address = dev->newsocket->address;
1958 manager->fds[fd] = dev->newsocket;
1959 manager->fdstate[fd] = MANAGED;
1960 if (manager->maxfd < fd)
1961 manager->maxfd = fd;
1963 socket_log(sock, &dev->newsocket->address, CREATION,
1964 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
1965 "accepted connection, new socket %p",
1968 UNLOCK(&manager->lock);
1970 dev->newsocket->references--;
1971 free_socket(&dev->newsocket);
1975 * Fill in the done event details and send it off.
1977 dev->result = result;
1978 task = dev->ev_sender;
1979 dev->ev_sender = sock;
1981 isc_task_sendanddetach(&task, ISC_EVENT_PTR(&dev));
1985 select_poke(sock->manager, sock->fd, SELECT_POKE_ACCEPT);
1986 UNLOCK(&sock->lock);
1991 internal_recv(isc_task_t *me, isc_event_t *ev) {
1992 isc_socketevent_t *dev;
1995 INSIST(ev->ev_type == ISC_SOCKEVENT_INTR);
1997 sock = ev->ev_sender;
1998 INSIST(VALID_SOCKET(sock));
2001 socket_log(sock, NULL, IOEVENT,
2002 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
2003 "internal_recv: task %p got event %p", me, ev);
2005 INSIST(sock->pending_recv == 1);
2006 sock->pending_recv = 0;
2008 INSIST(sock->references > 0);
2009 sock->references--; /* the internal event is done with this socket */
2010 if (sock->references == 0) {
2011 UNLOCK(&sock->lock);
2017 * Try to do as much I/O as possible on this socket. There are no
2018 * limits here, currently.
2020 dev = ISC_LIST_HEAD(sock->recv_list);
2021 while (dev != NULL) {
2022 switch (doio_recv(sock, dev)) {
2028 * read of 0 means the remote end was closed.
2029 * Run through the event queue and dispatch all
2030 * the events with an EOF result code.
2033 dev->result = ISC_R_EOF;
2034 send_recvdone_event(sock, &dev);
2035 dev = ISC_LIST_HEAD(sock->recv_list);
2036 } while (dev != NULL);
2041 send_recvdone_event(sock, &dev);
2045 dev = ISC_LIST_HEAD(sock->recv_list);
2049 if (!ISC_LIST_EMPTY(sock->recv_list))
2050 select_poke(sock->manager, sock->fd, SELECT_POKE_READ);
2052 UNLOCK(&sock->lock);
2056 internal_send(isc_task_t *me, isc_event_t *ev) {
2057 isc_socketevent_t *dev;
2060 INSIST(ev->ev_type == ISC_SOCKEVENT_INTW);
2063 * Find out what socket this is and lock it.
2065 sock = (isc_socket_t *)ev->ev_sender;
2066 INSIST(VALID_SOCKET(sock));
2069 socket_log(sock, NULL, IOEVENT,
2070 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
2071 "internal_send: task %p got event %p", me, ev);
2073 INSIST(sock->pending_send == 1);
2074 sock->pending_send = 0;
2076 INSIST(sock->references > 0);
2077 sock->references--; /* the internal event is done with this socket */
2078 if (sock->references == 0) {
2079 UNLOCK(&sock->lock);
2085 * Try to do as much I/O as possible on this socket. There are no
2086 * limits here, currently.
2088 dev = ISC_LIST_HEAD(sock->send_list);
2089 while (dev != NULL) {
2090 switch (doio_send(sock, dev)) {
2096 send_senddone_event(sock, &dev);
2100 dev = ISC_LIST_HEAD(sock->send_list);
2104 if (!ISC_LIST_EMPTY(sock->send_list))
2105 select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE);
2107 UNLOCK(&sock->lock);
2111 process_fds(isc_socketmgr_t *manager, int maxfd,
2112 fd_set *readfds, fd_set *writefds)
2116 isc_boolean_t unlock_sock;
2118 REQUIRE(maxfd <= (int)FD_SETSIZE);
2121 * Process read/writes on other fds here. Avoid locking
2122 * and unlocking twice if both reads and writes are possible.
2124 for (i = 0; i < maxfd; i++) {
2125 #ifdef ISC_PLATFORM_USETHREADS
2126 if (i == manager->pipe_fds[0] || i == manager->pipe_fds[1])
2128 #endif /* ISC_PLATFORM_USETHREADS */
2130 if (manager->fdstate[i] == CLOSE_PENDING) {
2131 manager->fdstate[i] = CLOSED;
2132 FD_CLR(i, &manager->read_fds);
2133 FD_CLR(i, &manager->write_fds);
2140 sock = manager->fds[i];
2141 unlock_sock = ISC_FALSE;
2142 if (FD_ISSET(i, readfds)) {
2144 FD_CLR(i, &manager->read_fds);
2147 unlock_sock = ISC_TRUE;
2149 if (!SOCK_DEAD(sock)) {
2151 dispatch_accept(sock);
2153 dispatch_recv(sock);
2155 FD_CLR(i, &manager->read_fds);
2158 if (FD_ISSET(i, writefds)) {
2160 FD_CLR(i, &manager->write_fds);
2164 unlock_sock = ISC_TRUE;
2167 if (!SOCK_DEAD(sock)) {
2168 if (sock->connecting)
2169 dispatch_connect(sock);
2171 dispatch_send(sock);
2173 FD_CLR(i, &manager->write_fds);
2176 UNLOCK(&sock->lock);
2180 #ifdef ISC_PLATFORM_USETHREADS
2182 * This is the thread that will loop forever, always in a select or poll
2185 * When select returns something to do, track down what thread gets to do
2186 * this I/O and post the event to it.
2188 static isc_threadresult_t
2189 watcher(void *uap) {
2190 isc_socketmgr_t *manager = uap;
2198 char strbuf[ISC_STRERRORSIZE];
2201 * Get the control fd here. This will never change.
2203 LOCK(&manager->lock);
2204 ctlfd = manager->pipe_fds[0];
2209 readfds = manager->read_fds;
2210 writefds = manager->write_fds;
2211 maxfd = manager->maxfd + 1;
2213 UNLOCK(&manager->lock);
2215 cc = select(maxfd, &readfds, &writefds, NULL, NULL);
2217 if (!SOFT_ERROR(errno)) {
2218 isc__strerror(errno, strbuf,
2220 FATAL_ERROR(__FILE__, __LINE__,
2222 isc_msgcat_get(isc_msgcat,
2230 LOCK(&manager->lock);
2235 * Process reads on internal, control fd.
2237 if (FD_ISSET(ctlfd, &readfds)) {
2239 select_readmsg(manager, &fd, &msg);
2241 manager_log(manager, IOEVENT,
2242 isc_msgcat_get(isc_msgcat,
2245 "watcher got message %d"),
2251 if (msg == SELECT_POKE_NOTHING)
2255 * Handle shutdown message. We really should
2256 * jump out of this loop right away, but
2257 * it doesn't matter if we have to do a little
2260 if (msg == SELECT_POKE_SHUTDOWN) {
2267 * This is a wakeup on a socket. Look
2268 * at the event queue for both read and write,
2269 * and decide if we need to watch on it now
2272 wakeup_socket(manager, fd, msg);
2276 process_fds(manager, maxfd, &readfds, &writefds);
2279 manager_log(manager, TRACE,
2280 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2281 ISC_MSG_EXITING, "watcher exiting"));
2283 UNLOCK(&manager->lock);
2284 return ((isc_threadresult_t)0);
2286 #endif /* ISC_PLATFORM_USETHREADS */
2289 * Create a new socket manager.
2292 isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
2293 isc_socketmgr_t *manager;
2294 #ifdef ISC_PLATFORM_USETHREADS
2295 char strbuf[ISC_STRERRORSIZE];
2298 REQUIRE(managerp != NULL && *managerp == NULL);
2300 #ifndef ISC_PLATFORM_USETHREADS
2301 if (socketmgr != NULL) {
2303 *managerp = socketmgr;
2304 return (ISC_R_SUCCESS);
2306 #endif /* ISC_PLATFORM_USETHREADS */
2308 manager = isc_mem_get(mctx, sizeof(*manager));
2309 if (manager == NULL)
2310 return (ISC_R_NOMEMORY);
2312 manager->magic = SOCKET_MANAGER_MAGIC;
2313 manager->mctx = NULL;
2314 memset(manager->fds, 0, sizeof(manager->fds));
2315 ISC_LIST_INIT(manager->socklist);
2316 if (isc_mutex_init(&manager->lock) != ISC_R_SUCCESS) {
2317 isc_mem_put(mctx, manager, sizeof(*manager));
2318 UNEXPECTED_ERROR(__FILE__, __LINE__,
2319 "isc_mutex_init() %s",
2320 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2321 ISC_MSG_FAILED, "failed"));
2322 return (ISC_R_UNEXPECTED);
2324 #ifdef ISC_PLATFORM_USETHREADS
2325 if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
2326 DESTROYLOCK(&manager->lock);
2327 isc_mem_put(mctx, manager, sizeof(*manager));
2328 UNEXPECTED_ERROR(__FILE__, __LINE__,
2329 "isc_condition_init() %s",
2330 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2331 ISC_MSG_FAILED, "failed"));
2332 return (ISC_R_UNEXPECTED);
2336 * Create the special fds that will be used to wake up the
2337 * select/poll loop when something internal needs to be done.
2339 if (pipe(manager->pipe_fds) != 0) {
2340 DESTROYLOCK(&manager->lock);
2341 isc_mem_put(mctx, manager, sizeof(*manager));
2342 isc__strerror(errno, strbuf, sizeof(strbuf));
2343 UNEXPECTED_ERROR(__FILE__, __LINE__,
2345 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2346 ISC_MSG_FAILED, "failed"),
2349 return (ISC_R_UNEXPECTED);
2352 RUNTIME_CHECK(make_nonblock(manager->pipe_fds[0]) == ISC_R_SUCCESS);
2354 RUNTIME_CHECK(make_nonblock(manager->pipe_fds[1]) == ISC_R_SUCCESS);
2356 #else /* ISC_PLATFORM_USETHREADS */
2358 #endif /* ISC_PLATFORM_USETHREADS */
2361 * Set up initial state for the select loop
2363 FD_ZERO(&manager->read_fds);
2364 FD_ZERO(&manager->write_fds);
2365 #ifdef ISC_PLATFORM_USETHREADS
2366 FD_SET(manager->pipe_fds[0], &manager->read_fds);
2367 manager->maxfd = manager->pipe_fds[0];
2368 #else /* ISC_PLATFORM_USETHREADS */
2370 #endif /* ISC_PLATFORM_USETHREADS */
2371 memset(manager->fdstate, 0, sizeof(manager->fdstate));
2373 #ifdef ISC_PLATFORM_USETHREADS
2375 * Start up the select/poll thread.
2377 if (isc_thread_create(watcher, manager, &manager->watcher) !=
2379 (void)close(manager->pipe_fds[0]);
2380 (void)close(manager->pipe_fds[1]);
2381 DESTROYLOCK(&manager->lock);
2382 isc_mem_put(mctx, manager, sizeof(*manager));
2383 UNEXPECTED_ERROR(__FILE__, __LINE__,
2384 "isc_thread_create() %s",
2385 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2386 ISC_MSG_FAILED, "failed"));
2387 return (ISC_R_UNEXPECTED);
2389 #endif /* ISC_PLATFORM_USETHREADS */
2390 isc_mem_attach(mctx, &manager->mctx);
2392 #ifndef ISC_PLATFORM_USETHREADS
2393 socketmgr = manager;
2394 #endif /* ISC_PLATFORM_USETHREADS */
2395 *managerp = manager;
2397 return (ISC_R_SUCCESS);
2401 isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
2402 isc_socketmgr_t *manager;
2407 * Destroy a socket manager.
2410 REQUIRE(managerp != NULL);
2411 manager = *managerp;
2412 REQUIRE(VALID_MANAGER(manager));
2414 #ifndef ISC_PLATFORM_USETHREADS
2415 if (manager->refs > 1) {
2420 #endif /* ISC_PLATFORM_USETHREADS */
2422 LOCK(&manager->lock);
2424 #ifdef ISC_PLATFORM_USETHREADS
2426 * Wait for all sockets to be destroyed.
2428 while (!ISC_LIST_EMPTY(manager->socklist)) {
2429 manager_log(manager, CREATION,
2430 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2431 ISC_MSG_SOCKETSREMAIN,
2433 WAIT(&manager->shutdown_ok, &manager->lock);
2435 #else /* ISC_PLATFORM_USETHREADS */
2437 * Hope all sockets have been destroyed.
2439 if (!ISC_LIST_EMPTY(manager->socklist)) {
2440 manager_log(manager, CREATION,
2441 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2442 ISC_MSG_SOCKETSREMAIN,
2446 #endif /* ISC_PLATFORM_USETHREADS */
2448 UNLOCK(&manager->lock);
2451 * Here, poke our select/poll thread. Do this by closing the write
2452 * half of the pipe, which will send EOF to the read half.
2453 * This is currently a no-op in the non-threaded case.
2455 select_poke(manager, 0, SELECT_POKE_SHUTDOWN);
2457 #ifdef ISC_PLATFORM_USETHREADS
2459 * Wait for thread to exit.
2461 if (isc_thread_join(manager->watcher, NULL) != ISC_R_SUCCESS)
2462 UNEXPECTED_ERROR(__FILE__, __LINE__,
2463 "isc_thread_join() %s",
2464 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2465 ISC_MSG_FAILED, "failed"));
2466 #endif /* ISC_PLATFORM_USETHREADS */
2471 #ifdef ISC_PLATFORM_USETHREADS
2472 (void)close(manager->pipe_fds[0]);
2473 (void)close(manager->pipe_fds[1]);
2474 (void)isc_condition_destroy(&manager->shutdown_ok);
2475 #endif /* ISC_PLATFORM_USETHREADS */
2477 for (i = 0; i < (int)FD_SETSIZE; i++)
2478 if (manager->fdstate[i] == CLOSE_PENDING)
2481 DESTROYLOCK(&manager->lock);
2483 mctx= manager->mctx;
2484 isc_mem_put(mctx, manager, sizeof(*manager));
2486 isc_mem_detach(&mctx);
2492 socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2496 isc_boolean_t have_lock = ISC_FALSE;
2497 isc_task_t *ntask = NULL;
2498 isc_result_t result = ISC_R_SUCCESS;
2500 dev->ev_sender = task;
2502 if (sock->type == isc_sockettype_udp) {
2503 io_state = doio_recv(sock, dev);
2506 have_lock = ISC_TRUE;
2508 if (ISC_LIST_EMPTY(sock->recv_list))
2509 io_state = doio_recv(sock, dev);
2511 io_state = DOIO_SOFT;
2517 * We couldn't read all or part of the request right now, so
2520 * Attach to socket and to task
2522 isc_task_attach(task, &ntask);
2523 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2527 have_lock = ISC_TRUE;
2531 * Enqueue the request. If the socket was previously not being
2532 * watched, poke the watcher to start paying attention to it.
2534 if (ISC_LIST_EMPTY(sock->recv_list))
2535 select_poke(sock->manager, sock->fd, SELECT_POKE_READ);
2536 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
2538 socket_log(sock, NULL, EVENT, NULL, 0, 0,
2539 "socket_recv: event %p -> task %p",
2542 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2543 result = ISC_R_INPROGRESS;
2547 dev->result = ISC_R_EOF;
2552 if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0)
2553 send_recvdone_event(sock, &dev);
2558 UNLOCK(&sock->lock);
2564 isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2565 unsigned int minimum, isc_task_t *task,
2566 isc_taskaction_t action, const void *arg)
2568 isc_socketevent_t *dev;
2569 isc_socketmgr_t *manager;
2570 unsigned int iocount;
2571 isc_buffer_t *buffer;
2573 REQUIRE(VALID_SOCKET(sock));
2574 REQUIRE(buflist != NULL);
2575 REQUIRE(!ISC_LIST_EMPTY(*buflist));
2576 REQUIRE(task != NULL);
2577 REQUIRE(action != NULL);
2579 manager = sock->manager;
2580 REQUIRE(VALID_MANAGER(manager));
2582 iocount = isc_bufferlist_availablecount(buflist);
2583 REQUIRE(iocount > 0);
2585 INSIST(sock->bound);
2587 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2589 return (ISC_R_NOMEMORY);
2593 * UDP sockets are always partial read
2595 if (sock->type == isc_sockettype_udp)
2599 dev->minimum = iocount;
2601 dev->minimum = minimum;
2605 * Move each buffer from the passed in list to our internal one.
2607 buffer = ISC_LIST_HEAD(*buflist);
2608 while (buffer != NULL) {
2609 ISC_LIST_DEQUEUE(*buflist, buffer, link);
2610 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2611 buffer = ISC_LIST_HEAD(*buflist);
2614 return (socket_recv(sock, dev, task, 0));
2618 isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum,
2619 isc_task_t *task, isc_taskaction_t action, const void *arg)
2621 isc_socketevent_t *dev;
2622 isc_socketmgr_t *manager;
2624 REQUIRE(VALID_SOCKET(sock));
2625 REQUIRE(action != NULL);
2627 manager = sock->manager;
2628 REQUIRE(VALID_MANAGER(manager));
2630 INSIST(sock->bound);
2632 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2634 return (ISC_R_NOMEMORY);
2636 return (isc_socket_recv2(sock, region, minimum, task, dev, 0));
2640 isc_socket_recv2(isc_socket_t *sock, isc_region_t *region,
2641 unsigned int minimum, isc_task_t *task,
2642 isc_socketevent_t *event, unsigned int flags)
2644 event->ev_sender = sock;
2645 event->result = ISC_R_UNEXPECTED;
2646 ISC_LIST_INIT(event->bufferlist);
2647 event->region = *region;
2650 event->attributes = 0;
2653 * UDP sockets are always partial read.
2655 if (sock->type == isc_sockettype_udp)
2659 event->minimum = region->length;
2661 event->minimum = minimum;
2664 return (socket_recv(sock, event, task, flags));
2668 socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2669 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2673 isc_boolean_t have_lock = ISC_FALSE;
2674 isc_task_t *ntask = NULL;
2675 isc_result_t result = ISC_R_SUCCESS;
2677 dev->ev_sender = task;
2679 set_dev_address(address, sock, dev);
2680 if (pktinfo != NULL) {
2681 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
2682 dev->pktinfo = *pktinfo;
2684 if (!isc_sockaddr_issitelocal(address) &&
2685 !isc_sockaddr_islinklocal(address)) {
2686 socket_log(sock, NULL, TRACE, isc_msgcat,
2687 ISC_MSGSET_SOCKET, ISC_MSG_PKTINFOPROVIDED,
2688 "pktinfo structure provided, ifindex %u "
2689 "(set to 0)", pktinfo->ipi6_ifindex);
2692 * Set the pktinfo index to 0 here, to let the
2693 * kernel decide what interface it should send on.
2695 dev->pktinfo.ipi6_ifindex = 0;
2699 if (sock->type == isc_sockettype_udp)
2700 io_state = doio_send(sock, dev);
2703 have_lock = ISC_TRUE;
2705 if (ISC_LIST_EMPTY(sock->send_list))
2706 io_state = doio_send(sock, dev);
2708 io_state = DOIO_SOFT;
2714 * We couldn't send all or part of the request right now, so
2715 * queue it unless ISC_SOCKFLAG_NORETRY is set.
2717 if ((flags & ISC_SOCKFLAG_NORETRY) == 0) {
2718 isc_task_attach(task, &ntask);
2719 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2723 have_lock = ISC_TRUE;
2727 * Enqueue the request. If the socket was previously
2728 * not being watched, poke the watcher to start
2729 * paying attention to it.
2731 if (ISC_LIST_EMPTY(sock->send_list))
2732 select_poke(sock->manager, sock->fd,
2734 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
2736 socket_log(sock, NULL, EVENT, NULL, 0, 0,
2737 "socket_send: event %p -> task %p",
2740 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2741 result = ISC_R_INPROGRESS;
2747 if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0)
2748 send_senddone_event(sock, &dev);
2753 UNLOCK(&sock->lock);
2759 isc_socket_send(isc_socket_t *sock, isc_region_t *region,
2760 isc_task_t *task, isc_taskaction_t action, const void *arg)
2763 * REQUIRE() checking is performed in isc_socket_sendto().
2765 return (isc_socket_sendto(sock, region, task, action, arg, NULL,
2770 isc_socket_sendto(isc_socket_t *sock, isc_region_t *region,
2771 isc_task_t *task, isc_taskaction_t action, const void *arg,
2772 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
2774 isc_socketevent_t *dev;
2775 isc_socketmgr_t *manager;
2777 REQUIRE(VALID_SOCKET(sock));
2778 REQUIRE(region != NULL);
2779 REQUIRE(task != NULL);
2780 REQUIRE(action != NULL);
2782 manager = sock->manager;
2783 REQUIRE(VALID_MANAGER(manager));
2785 INSIST(sock->bound);
2787 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
2789 return (ISC_R_NOMEMORY);
2792 dev->region = *region;
2794 return (socket_send(sock, dev, task, address, pktinfo, 0));
2798 isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2799 isc_task_t *task, isc_taskaction_t action, const void *arg)
2801 return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL,
2806 isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
2807 isc_task_t *task, isc_taskaction_t action, const void *arg,
2808 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
2810 isc_socketevent_t *dev;
2811 isc_socketmgr_t *manager;
2812 unsigned int iocount;
2813 isc_buffer_t *buffer;
2815 REQUIRE(VALID_SOCKET(sock));
2816 REQUIRE(buflist != NULL);
2817 REQUIRE(!ISC_LIST_EMPTY(*buflist));
2818 REQUIRE(task != NULL);
2819 REQUIRE(action != NULL);
2821 manager = sock->manager;
2822 REQUIRE(VALID_MANAGER(manager));
2824 iocount = isc_bufferlist_usedcount(buflist);
2825 REQUIRE(iocount > 0);
2827 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
2829 return (ISC_R_NOMEMORY);
2833 * Move each buffer from the passed in list to our internal one.
2835 buffer = ISC_LIST_HEAD(*buflist);
2836 while (buffer != NULL) {
2837 ISC_LIST_DEQUEUE(*buflist, buffer, link);
2838 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2839 buffer = ISC_LIST_HEAD(*buflist);
2842 return (socket_send(sock, dev, task, address, pktinfo, 0));
2846 isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region,
2848 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2849 isc_socketevent_t *event, unsigned int flags)
2851 REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
2852 if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
2853 REQUIRE(sock->type == isc_sockettype_udp);
2854 event->ev_sender = sock;
2855 event->result = ISC_R_UNEXPECTED;
2856 ISC_LIST_INIT(event->bufferlist);
2857 event->region = *region;
2860 event->attributes = 0;
2862 return (socket_send(sock, event, task, address, pktinfo, flags));
2866 isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr) {
2867 char strbuf[ISC_STRERRORSIZE];
2872 INSIST(!sock->bound);
2874 if (sock->pf != sockaddr->type.sa.sa_family) {
2875 UNLOCK(&sock->lock);
2876 return (ISC_R_FAMILYMISMATCH);
2879 * Only set SO_REUSEADDR when we want a specific port.
2881 if (isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
2882 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on,
2884 UNEXPECTED_ERROR(__FILE__, __LINE__,
2885 "setsockopt(%d) %s", sock->fd,
2886 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2887 ISC_MSG_FAILED, "failed"));
2890 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
2891 UNLOCK(&sock->lock);
2894 return (ISC_R_NOPERM);
2896 return (ISC_R_ADDRNOTAVAIL);
2898 return (ISC_R_ADDRINUSE);
2900 return (ISC_R_BOUND);
2902 isc__strerror(errno, strbuf, sizeof(strbuf));
2903 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
2905 return (ISC_R_UNEXPECTED);
2909 socket_log(sock, sockaddr, TRACE,
2910 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
2913 UNLOCK(&sock->lock);
2914 return (ISC_R_SUCCESS);
2918 isc_socket_filter(isc_socket_t *sock, const char *filter) {
2919 #ifdef SO_ACCEPTFILTER
2920 char strbuf[ISC_STRERRORSIZE];
2921 struct accept_filter_arg afa;
2927 REQUIRE(VALID_SOCKET(sock));
2929 #ifdef SO_ACCEPTFILTER
2930 bzero(&afa, sizeof(afa));
2931 strncpy(afa.af_name, filter, sizeof(afa.af_name));
2932 if (setsockopt(sock->fd, SOL_SOCKET, SO_ACCEPTFILTER,
2933 &afa, sizeof(afa)) == -1) {
2934 isc__strerror(errno, strbuf, sizeof(strbuf));
2935 socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
2936 ISC_MSG_FILTER, "setsockopt(SO_ACCEPTFILTER): %s",
2938 return (ISC_R_FAILURE);
2940 return (ISC_R_SUCCESS);
2942 return (ISC_R_NOTIMPLEMENTED);
2947 * Set up to listen on a given socket. We do this by creating an internal
2948 * event that will be dispatched when the socket has read activity. The
2949 * watcher will send the internal event to the task when there is a new
2952 * Unlike in read, we don't preallocate a done event here. Every time there
2953 * is a new connection we'll have to allocate a new one anyway, so we might
2954 * as well keep things simple rather than having to track them.
2957 isc_socket_listen(isc_socket_t *sock, unsigned int backlog) {
2958 char strbuf[ISC_STRERRORSIZE];
2960 REQUIRE(VALID_SOCKET(sock));
2964 REQUIRE(!sock->listener);
2965 REQUIRE(sock->bound);
2966 REQUIRE(sock->type == isc_sockettype_tcp);
2969 backlog = SOMAXCONN;
2971 if (listen(sock->fd, (int)backlog) < 0) {
2972 UNLOCK(&sock->lock);
2973 isc__strerror(errno, strbuf, sizeof(strbuf));
2975 UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf);
2977 return (ISC_R_UNEXPECTED);
2982 UNLOCK(&sock->lock);
2983 return (ISC_R_SUCCESS);
2987 * This should try to do agressive accept() XXXMLG
2990 isc_socket_accept(isc_socket_t *sock,
2991 isc_task_t *task, isc_taskaction_t action, const void *arg)
2993 isc_socket_newconnev_t *dev;
2994 isc_socketmgr_t *manager;
2995 isc_task_t *ntask = NULL;
2996 isc_socket_t *nsock;
2998 isc_boolean_t do_poke = ISC_FALSE;
3000 REQUIRE(VALID_SOCKET(sock));
3001 manager = sock->manager;
3002 REQUIRE(VALID_MANAGER(manager));
3006 REQUIRE(sock->listener);
3009 * Sender field is overloaded here with the task we will be sending
3010 * this event to. Just before the actual event is delivered the
3011 * actual ev_sender will be touched up to be the socket.
3013 dev = (isc_socket_newconnev_t *)
3014 isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
3015 action, arg, sizeof(*dev));
3017 UNLOCK(&sock->lock);
3018 return (ISC_R_NOMEMORY);
3020 ISC_LINK_INIT(dev, ev_link);
3022 ret = allocate_socket(manager, sock->type, &nsock);
3023 if (ret != ISC_R_SUCCESS) {
3024 isc_event_free(ISC_EVENT_PTR(&dev));
3025 UNLOCK(&sock->lock);
3030 * Attach to socket and to task.
3032 isc_task_attach(task, &ntask);
3033 nsock->references++;
3035 dev->ev_sender = ntask;
3036 dev->newsocket = nsock;
3039 * Poke watcher here. We still have the socket locked, so there
3040 * is no race condition. We will keep the lock for such a short
3041 * bit of time waking it up now or later won't matter all that much.
3043 if (ISC_LIST_EMPTY(sock->accept_list))
3046 ISC_LIST_ENQUEUE(sock->accept_list, dev, ev_link);
3049 select_poke(manager, sock->fd, SELECT_POKE_ACCEPT);
3051 UNLOCK(&sock->lock);
3052 return (ISC_R_SUCCESS);
3056 isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
3057 isc_task_t *task, isc_taskaction_t action, const void *arg)
3059 isc_socket_connev_t *dev;
3060 isc_task_t *ntask = NULL;
3061 isc_socketmgr_t *manager;
3063 char strbuf[ISC_STRERRORSIZE];
3065 REQUIRE(VALID_SOCKET(sock));
3066 REQUIRE(addr != NULL);
3067 REQUIRE(task != NULL);
3068 REQUIRE(action != NULL);
3070 manager = sock->manager;
3071 REQUIRE(VALID_MANAGER(manager));
3072 REQUIRE(addr != NULL);
3074 if (isc_sockaddr_ismulticast(addr))
3075 return (ISC_R_MULTICAST);
3079 REQUIRE(!sock->connecting);
3081 dev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
3082 ISC_SOCKEVENT_CONNECT,
3086 UNLOCK(&sock->lock);
3087 return (ISC_R_NOMEMORY);
3089 ISC_LINK_INIT(dev, ev_link);
3092 * Try to do the connect right away, as there can be only one
3093 * outstanding, and it might happen to complete.
3095 sock->address = *addr;
3096 cc = connect(sock->fd, &addr->type.sa, addr->length);
3098 if (SOFT_ERROR(errno) || errno == EINPROGRESS)
3102 #define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit;
3103 ERROR_MATCH(EACCES, ISC_R_NOPERM);
3104 ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
3105 ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
3106 ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED);
3107 ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH);
3109 ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH);
3111 ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH);
3112 ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES);
3113 ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH);
3114 ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED);
3115 ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET);
3119 sock->connected = 0;
3121 isc__strerror(errno, strbuf, sizeof(strbuf));
3122 UNEXPECTED_ERROR(__FILE__, __LINE__, "%d/%s", errno, strbuf);
3124 UNLOCK(&sock->lock);
3125 isc_event_free(ISC_EVENT_PTR(&dev));
3126 return (ISC_R_UNEXPECTED);
3129 sock->connected = 0;
3130 isc_task_send(task, ISC_EVENT_PTR(&dev));
3132 UNLOCK(&sock->lock);
3133 return (ISC_R_SUCCESS);
3137 * If connect completed, fire off the done event.
3140 sock->connected = 1;
3142 dev->result = ISC_R_SUCCESS;
3143 isc_task_send(task, ISC_EVENT_PTR(&dev));
3145 UNLOCK(&sock->lock);
3146 return (ISC_R_SUCCESS);
3154 isc_task_attach(task, &ntask);
3156 sock->connecting = 1;
3158 dev->ev_sender = ntask;
3161 * Poke watcher here. We still have the socket locked, so there
3162 * is no race condition. We will keep the lock for such a short
3163 * bit of time waking it up now or later won't matter all that much.
3165 if (sock->connect_ev == NULL)
3166 select_poke(manager, sock->fd, SELECT_POKE_CONNECT);
3168 sock->connect_ev = dev;
3170 UNLOCK(&sock->lock);
3171 return (ISC_R_SUCCESS);
3175 * Called when a socket with a pending connect() finishes.
3178 internal_connect(isc_task_t *me, isc_event_t *ev) {
3180 isc_socket_connev_t *dev;
3183 ISC_SOCKADDR_LEN_T optlen;
3184 char strbuf[ISC_STRERRORSIZE];
3185 char peerbuf[ISC_SOCKADDR_FORMATSIZE];
3188 INSIST(ev->ev_type == ISC_SOCKEVENT_INTW);
3190 sock = ev->ev_sender;
3191 INSIST(VALID_SOCKET(sock));
3196 * When the internal event was sent the reference count was bumped
3197 * to keep the socket around for us. Decrement the count here.
3199 INSIST(sock->references > 0);
3201 if (sock->references == 0) {
3202 UNLOCK(&sock->lock);
3208 * Has this event been canceled?
3210 dev = sock->connect_ev;
3212 INSIST(!sock->connecting);
3213 UNLOCK(&sock->lock);
3217 INSIST(sock->connecting);
3218 sock->connecting = 0;
3221 * Get any possible error status here.
3223 optlen = sizeof(cc);
3224 if (getsockopt(sock->fd, SOL_SOCKET, SO_ERROR,
3225 (void *)&cc, (void *)&optlen) < 0)
3232 * If the error is EAGAIN, just re-select on this
3233 * fd and pretend nothing strange happened.
3235 if (SOFT_ERROR(errno) || errno == EINPROGRESS) {
3236 sock->connecting = 1;
3237 select_poke(sock->manager, sock->fd,
3238 SELECT_POKE_CONNECT);
3239 UNLOCK(&sock->lock);
3245 * Translate other errors into ISC_R_* flavors.
3248 #define ERROR_MATCH(a, b) case a: dev->result = b; break;
3249 ERROR_MATCH(EACCES, ISC_R_NOPERM);
3250 ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
3251 ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
3252 ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED);
3253 ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH);
3255 ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH);
3257 ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH);
3258 ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES);
3259 ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH);
3260 ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED);
3261 ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT);
3262 ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET);
3265 dev->result = ISC_R_UNEXPECTED;
3266 isc_sockaddr_format(&sock->address, peerbuf,
3268 isc__strerror(errno, strbuf, sizeof(strbuf));
3269 UNEXPECTED_ERROR(__FILE__, __LINE__,
3270 "internal_connect: connect(%s) %s",
3274 dev->result = ISC_R_SUCCESS;
3275 sock->connected = 1;
3279 sock->connect_ev = NULL;
3281 UNLOCK(&sock->lock);
3283 task = dev->ev_sender;
3284 dev->ev_sender = sock;
3285 isc_task_sendanddetach(&task, ISC_EVENT_PTR(&dev));
3289 isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3292 REQUIRE(VALID_SOCKET(sock));
3293 REQUIRE(addressp != NULL);
3297 if (sock->connected) {
3298 *addressp = sock->address;
3299 ret = ISC_R_SUCCESS;
3301 ret = ISC_R_NOTCONNECTED;
3304 UNLOCK(&sock->lock);
3310 isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3311 ISC_SOCKADDR_LEN_T len;
3313 char strbuf[ISC_STRERRORSIZE];
3315 REQUIRE(VALID_SOCKET(sock));
3316 REQUIRE(addressp != NULL);
3321 ret = ISC_R_NOTBOUND;
3325 ret = ISC_R_SUCCESS;
3327 len = sizeof(addressp->type);
3328 if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) {
3329 isc__strerror(errno, strbuf, sizeof(strbuf));
3330 UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s",
3332 ret = ISC_R_UNEXPECTED;
3335 addressp->length = (unsigned int)len;
3338 UNLOCK(&sock->lock);
3344 * Run through the list of events on this socket, and cancel the ones
3345 * queued for task "task" of type "how". "how" is a bitmask.
3348 isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
3350 REQUIRE(VALID_SOCKET(sock));
3353 * Quick exit if there is nothing to do. Don't even bother locking
3362 * All of these do the same thing, more or less.
3364 * o If the internal event is marked as "posted" try to
3365 * remove it from the task's queue. If this fails, mark it
3366 * as canceled instead, and let the task clean it up later.
3367 * o For each I/O request for that task of that type, post
3368 * its done event with status of "ISC_R_CANCELED".
3369 * o Reset any state needed.
3371 if (((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV)
3372 && !ISC_LIST_EMPTY(sock->recv_list)) {
3373 isc_socketevent_t *dev;
3374 isc_socketevent_t *next;
3375 isc_task_t *current_task;
3377 dev = ISC_LIST_HEAD(sock->recv_list);
3379 while (dev != NULL) {
3380 current_task = dev->ev_sender;
3381 next = ISC_LIST_NEXT(dev, ev_link);
3383 if ((task == NULL) || (task == current_task)) {
3384 dev->result = ISC_R_CANCELED;
3385 send_recvdone_event(sock, &dev);
3391 if (((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND)
3392 && !ISC_LIST_EMPTY(sock->send_list)) {
3393 isc_socketevent_t *dev;
3394 isc_socketevent_t *next;
3395 isc_task_t *current_task;
3397 dev = ISC_LIST_HEAD(sock->send_list);
3399 while (dev != NULL) {
3400 current_task = dev->ev_sender;
3401 next = ISC_LIST_NEXT(dev, ev_link);
3403 if ((task == NULL) || (task == current_task)) {
3404 dev->result = ISC_R_CANCELED;
3405 send_senddone_event(sock, &dev);
3411 if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT)
3412 && !ISC_LIST_EMPTY(sock->accept_list)) {
3413 isc_socket_newconnev_t *dev;
3414 isc_socket_newconnev_t *next;
3415 isc_task_t *current_task;
3417 dev = ISC_LIST_HEAD(sock->accept_list);
3418 while (dev != NULL) {
3419 current_task = dev->ev_sender;
3420 next = ISC_LIST_NEXT(dev, ev_link);
3422 if ((task == NULL) || (task == current_task)) {
3424 ISC_LIST_UNLINK(sock->accept_list, dev,
3427 dev->newsocket->references--;
3428 free_socket(&dev->newsocket);
3430 dev->result = ISC_R_CANCELED;
3431 dev->ev_sender = sock;
3432 isc_task_sendanddetach(¤t_task,
3433 ISC_EVENT_PTR(&dev));
3441 * Connecting is not a list.
3443 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT)
3444 && sock->connect_ev != NULL) {
3445 isc_socket_connev_t *dev;
3446 isc_task_t *current_task;
3448 INSIST(sock->connecting);
3449 sock->connecting = 0;
3451 dev = sock->connect_ev;
3452 current_task = dev->ev_sender;
3454 if ((task == NULL) || (task == current_task)) {
3455 sock->connect_ev = NULL;
3457 dev->result = ISC_R_CANCELED;
3458 dev->ev_sender = sock;
3459 isc_task_sendanddetach(¤t_task,
3460 ISC_EVENT_PTR(&dev));
3464 UNLOCK(&sock->lock);
3468 isc_socket_gettype(isc_socket_t *sock) {
3469 REQUIRE(VALID_SOCKET(sock));
3471 return (sock->type);
3475 isc_socket_isbound(isc_socket_t *sock) {
3479 val = ((sock->bound) ? ISC_TRUE : ISC_FALSE);
3480 UNLOCK(&sock->lock);
3486 isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
3487 #if defined(IPV6_V6ONLY)
3488 int onoff = yes ? 1 : 0;
3494 REQUIRE(VALID_SOCKET(sock));
3497 if (sock->pf == AF_INET6) {
3498 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY,
3499 (void *)&onoff, sizeof(onoff));
3504 #ifndef ISC_PLATFORM_USETHREADS
3506 isc__socketmgr_getfdsets(fd_set *readset, fd_set *writeset, int *maxfd) {
3507 if (socketmgr == NULL)
3510 *readset = socketmgr->read_fds;
3511 *writeset = socketmgr->write_fds;
3512 *maxfd = socketmgr->maxfd + 1;
3517 isc__socketmgr_dispatch(fd_set *readset, fd_set *writeset, int maxfd) {
3518 isc_socketmgr_t *manager = socketmgr;
3520 if (manager == NULL)
3521 return (ISC_R_NOTFOUND);
3523 process_fds(manager, maxfd, readset, writeset);
3524 return (ISC_R_SUCCESS);
3526 #endif /* ISC_PLATFORM_USETHREADS */