3 * Bill Paul <wpaul@windriver.com>. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by Bill Paul.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
32 * $FreeBSD: src/sys/compat/ndis/subr_ntoskrnl.c,v 1.117 2012/11/17 01:51:26 svnexp Exp $
35 #include <sys/ctype.h>
36 #include <sys/unistd.h>
37 #include <sys/param.h>
38 #include <sys/types.h>
39 #include <sys/errno.h>
40 #include <sys/systm.h>
41 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/mutex2.h>
46 #include <sys/callout.h>
47 #include <sys/kernel.h>
49 #include <sys/condvar.h>
50 #include <sys/kthread.h>
51 #include <sys/module.h>
52 #include <sys/sched.h>
53 #include <sys/sysctl.h>
55 #include <machine/atomic.h>
59 #include <sys/objcache.h>
62 #include <vm/vm_param.h>
64 #include <vm/vm_kern.h>
65 #include <vm/vm_map.h>
66 #include <vm/vm_extern.h>
68 #include <emulation/ndis/pe_var.h>
69 #include <emulation/ndis/cfg_var.h>
70 #include <emulation/ndis/resource_var.h>
71 #include <emulation/ndis/ntoskrnl_var.h>
72 #include <emulation/ndis/hal_var.h>
73 #include <emulation/ndis/ndis_var.h>
75 #include <machine/stdarg.h>
77 #ifdef NTOSKRNL_DEBUG_TIMERS
78 static int sysctl_show_timers(SYSCTL_HANDLER_ARGS);
80 SYSCTL_PROC(_debug, OID_AUTO, ntoskrnl_timers, CTLTYPE_INT | CTLFLAG_RW,
81 NULL, 0, sysctl_show_timers, "I",
82 "Show ntoskrnl timer stats");
96 typedef struct kdpc_queue kdpc_queue;
100 struct thread *we_td;
103 typedef struct wb_ext wb_ext;
105 #define NTOSKRNL_TIMEOUTS 256
106 #ifdef NTOSKRNL_DEBUG_TIMERS
107 static uint64_t ntoskrnl_timer_fires;
108 static uint64_t ntoskrnl_timer_sets;
109 static uint64_t ntoskrnl_timer_reloads;
110 static uint64_t ntoskrnl_timer_cancels;
113 struct callout_entry {
114 struct callout ce_callout;
118 typedef struct callout_entry callout_entry;
120 static struct list_entry ntoskrnl_calllist;
121 static struct mtx ntoskrnl_calllock;
122 struct kuser_shared_data kuser_shared_data;
124 static struct list_entry ntoskrnl_intlist;
125 static kspin_lock ntoskrnl_intlock;
127 static uint8_t RtlEqualUnicodeString(unicode_string *,
128 unicode_string *, uint8_t);
129 static void RtlCopyString(ansi_string *, const ansi_string *);
130 static void RtlCopyUnicodeString(unicode_string *,
132 static irp *IoBuildSynchronousFsdRequest(uint32_t, device_object *,
133 void *, uint32_t, uint64_t *, nt_kevent *, io_status_block *);
134 static irp *IoBuildAsynchronousFsdRequest(uint32_t,
135 device_object *, void *, uint32_t, uint64_t *, io_status_block *);
136 static irp *IoBuildDeviceIoControlRequest(uint32_t,
137 device_object *, void *, uint32_t, void *, uint32_t,
138 uint8_t, nt_kevent *, io_status_block *);
139 static irp *IoAllocateIrp(uint8_t, uint8_t);
140 static void IoReuseIrp(irp *, uint32_t);
141 static void IoFreeIrp(irp *);
142 static void IoInitializeIrp(irp *, uint16_t, uint8_t);
143 static irp *IoMakeAssociatedIrp(irp *, uint8_t);
144 static uint32_t KeWaitForMultipleObjects(uint32_t,
145 nt_dispatch_header **, uint32_t, uint32_t, uint32_t, uint8_t,
146 int64_t *, wait_block *);
147 static void ntoskrnl_waittest(nt_dispatch_header *, uint32_t);
148 static void ntoskrnl_satisfy_wait(nt_dispatch_header *, struct thread *);
149 static void ntoskrnl_satisfy_multiple_waits(wait_block *);
150 static int ntoskrnl_is_signalled(nt_dispatch_header *, struct thread *);
151 static void ntoskrnl_insert_timer(ktimer *, int);
152 static void ntoskrnl_remove_timer(ktimer *);
153 #ifdef NTOSKRNL_DEBUG_TIMERS
154 static void ntoskrnl_show_timers(void);
156 static void ntoskrnl_timercall(void *);
157 static void ntoskrnl_dpc_thread(void *);
158 static void ntoskrnl_destroy_dpc_threads(void);
159 static void ntoskrnl_destroy_workitem_threads(void);
160 static void ntoskrnl_workitem_thread(void *);
161 static void ntoskrnl_workitem(device_object *, void *);
162 static void ntoskrnl_unicode_to_ascii(uint16_t *, char *, int);
163 static void ntoskrnl_ascii_to_unicode(char *, uint16_t *, int);
164 static uint8_t ntoskrnl_insert_dpc(list_entry *, kdpc *);
165 static void WRITE_REGISTER_USHORT(uint16_t *, uint16_t);
166 static uint16_t READ_REGISTER_USHORT(uint16_t *);
167 static void WRITE_REGISTER_ULONG(uint32_t *, uint32_t);
168 static uint32_t READ_REGISTER_ULONG(uint32_t *);
169 static void WRITE_REGISTER_UCHAR(uint8_t *, uint8_t);
170 static uint8_t READ_REGISTER_UCHAR(uint8_t *);
171 static int64_t _allmul(int64_t, int64_t);
172 static int64_t _alldiv(int64_t, int64_t);
173 static int64_t _allrem(int64_t, int64_t);
174 static int64_t _allshr(int64_t, uint8_t);
175 static int64_t _allshl(int64_t, uint8_t);
176 static uint64_t _aullmul(uint64_t, uint64_t);
177 static uint64_t _aulldiv(uint64_t, uint64_t);
178 static uint64_t _aullrem(uint64_t, uint64_t);
179 static uint64_t _aullshr(uint64_t, uint8_t);
180 static uint64_t _aullshl(uint64_t, uint8_t);
181 static slist_entry *ntoskrnl_pushsl(slist_header *, slist_entry *);
182 static void InitializeSListHead(slist_header *);
183 static slist_entry *ntoskrnl_popsl(slist_header *);
184 static void ExFreePoolWithTag(void *, uint32_t);
185 static void ExInitializePagedLookasideList(paged_lookaside_list *,
186 lookaside_alloc_func *, lookaside_free_func *,
187 uint32_t, size_t, uint32_t, uint16_t);
188 static void ExDeletePagedLookasideList(paged_lookaside_list *);
189 static void ExInitializeNPagedLookasideList(npaged_lookaside_list *,
190 lookaside_alloc_func *, lookaside_free_func *,
191 uint32_t, size_t, uint32_t, uint16_t);
192 static void ExDeleteNPagedLookasideList(npaged_lookaside_list *);
194 *ExInterlockedPushEntrySList(slist_header *,
195 slist_entry *, kspin_lock *);
197 *ExInterlockedPopEntrySList(slist_header *, kspin_lock *);
198 static uint32_t InterlockedIncrement(volatile uint32_t *);
199 static uint32_t InterlockedDecrement(volatile uint32_t *);
200 static void ExInterlockedAddLargeStatistic(uint64_t *, uint32_t);
201 static void *MmAllocateContiguousMemory(uint32_t, uint64_t);
202 static void *MmAllocateContiguousMemorySpecifyCache(uint32_t,
203 uint64_t, uint64_t, uint64_t, enum nt_caching_type);
204 static void MmFreeContiguousMemory(void *);
205 static void MmFreeContiguousMemorySpecifyCache(void *, uint32_t,
206 enum nt_caching_type);
207 static uint32_t MmSizeOfMdl(void *, size_t);
208 static void *MmMapLockedPages(mdl *, uint8_t);
209 static void *MmMapLockedPagesSpecifyCache(mdl *,
210 uint8_t, uint32_t, void *, uint32_t, uint32_t);
211 static void MmUnmapLockedPages(void *, mdl *);
212 static device_t ntoskrnl_finddev(device_t, uint64_t, struct resource **);
213 static void RtlZeroMemory(void *, size_t);
214 static void RtlSecureZeroMemory(void *, size_t);
215 static void RtlFillMemory(void *, size_t, uint8_t);
216 static void RtlMoveMemory(void *, const void *, size_t);
217 static ndis_status RtlCharToInteger(const char *, uint32_t, uint32_t *);
218 static void RtlCopyMemory(void *, const void *, size_t);
219 static size_t RtlCompareMemory(const void *, const void *, size_t);
220 static ndis_status RtlUnicodeStringToInteger(unicode_string *,
221 uint32_t, uint32_t *);
222 static int atoi (const char *);
223 static long atol (const char *);
224 static int rand(void);
225 static void srand(unsigned int);
226 static void KeQuerySystemTime(uint64_t *);
227 static uint32_t KeTickCount(void);
228 static uint8_t IoIsWdmVersionAvailable(uint8_t, uint8_t);
229 static int32_t IoOpenDeviceRegistryKey(struct device_object *, uint32_t,
231 static void ntoskrnl_thrfunc(void *);
232 static ndis_status PsCreateSystemThread(ndis_handle *,
233 uint32_t, void *, ndis_handle, void *, void *, void *);
234 static ndis_status PsTerminateSystemThread(ndis_status);
235 static ndis_status IoGetDeviceObjectPointer(unicode_string *,
236 uint32_t, void *, device_object *);
237 static ndis_status IoGetDeviceProperty(device_object *, uint32_t,
238 uint32_t, void *, uint32_t *);
239 static void KeInitializeMutex(kmutant *, uint32_t);
240 static uint32_t KeReleaseMutex(kmutant *, uint8_t);
241 static uint32_t KeReadStateMutex(kmutant *);
242 static ndis_status ObReferenceObjectByHandle(ndis_handle,
243 uint32_t, void *, uint8_t, void **, void **);
244 static void ObfDereferenceObject(void *);
245 static uint32_t ZwClose(ndis_handle);
246 static uint32_t WmiQueryTraceInformation(uint32_t, void *, uint32_t,
248 static uint32_t WmiTraceMessage(uint64_t, uint32_t, void *, uint16_t, ...);
249 static uint32_t IoWMIRegistrationControl(device_object *, uint32_t);
250 static void *ntoskrnl_memset(void *, int, size_t);
251 static void *ntoskrnl_memmove(void *, void *, size_t);
252 static void *ntoskrnl_memchr(void *, unsigned char, size_t);
253 static char *ntoskrnl_strstr(char *, char *);
254 static char *ntoskrnl_strncat(char *, char *, size_t);
255 static int ntoskrnl_toupper(int);
256 static int ntoskrnl_tolower(int);
257 static funcptr ntoskrnl_findwrap(funcptr);
258 static uint32_t DbgPrint(char *, ...) __printflike(1, 2);
259 static void DbgBreakPoint(void);
260 static void KeBugCheckEx(uint32_t, u_long, u_long, u_long, u_long);
261 static int32_t KeDelayExecutionThread(uint8_t, uint8_t, int64_t *);
262 static int32_t KeSetPriorityThread(struct thread *, int32_t);
263 static void dummy(void);
265 static struct lock ntoskrnl_dispatchlock;
266 static struct mtx ntoskrnl_interlock;
267 static kspin_lock ntoskrnl_cancellock;
268 static int ntoskrnl_kth = 0;
269 static struct nt_objref_head ntoskrnl_reflist;
270 static struct objcache *mdl_cache;
271 static struct objcache *iw_cache;
272 static struct kdpc_queue *kq_queues;
273 static struct kdpc_queue *wq_queues;
274 static int wq_idx = 0;
276 static struct objcache_malloc_args mdl_alloc_args = {
277 MDL_ZONE_SIZE, M_DEVBUF
279 static struct objcache_malloc_args iw_alloc_args = {
280 sizeof(io_workitem), M_DEVBUF
284 ntoskrnl_libinit(void)
286 image_patch_table *patch;
293 lockinit(&ntoskrnl_dispatchlock, MTX_NDIS_LOCK, 0, LK_CANRECURSE);
294 mtx_init(&ntoskrnl_interlock, "ndis1");
295 KeInitializeSpinLock(&ntoskrnl_cancellock);
296 KeInitializeSpinLock(&ntoskrnl_intlock);
297 TAILQ_INIT(&ntoskrnl_reflist);
299 InitializeListHead(&ntoskrnl_calllist);
300 InitializeListHead(&ntoskrnl_intlist);
301 mtx_init(&ntoskrnl_calllock, "ndis2");
303 kq_queues = ExAllocatePoolWithTag(NonPagedPool,
304 #ifdef NTOSKRNL_MULTIPLE_DPCS
305 sizeof(kdpc_queue) * ncpus, 0);
307 sizeof(kdpc_queue), 0);
310 if (kq_queues == NULL)
313 wq_queues = ExAllocatePoolWithTag(NonPagedPool,
314 sizeof(kdpc_queue) * WORKITEM_THREADS, 0);
316 if (wq_queues == NULL)
319 #ifdef NTOSKRNL_MULTIPLE_DPCS
320 bzero((char *)kq_queues, sizeof(kdpc_queue) * ncpus);
322 bzero((char *)kq_queues, sizeof(kdpc_queue));
324 bzero((char *)wq_queues, sizeof(kdpc_queue) * WORKITEM_THREADS);
327 * Launch the DPC threads.
330 #ifdef NTOSKRNL_MULTIPLE_DPCS
331 for (i = 0; i < ncpus; i++) {
333 for (i = 0; i < 1; i++) {
337 error = kthread_create_cpu(ntoskrnl_dpc_thread, kq, &p, i,
340 panic("failed to launch DPC thread");
344 * Launch the workitem threads.
347 for (i = 0; i < WORKITEM_THREADS; i++) {
349 error = kthread_create(ntoskrnl_workitem_thread, kq, &p,
350 "Win Workitem %d", i);
352 panic("failed to launch workitem thread");
355 patch = ntoskrnl_functbl;
356 while (patch->ipt_func != NULL) {
357 windrv_wrap((funcptr)patch->ipt_func,
358 (funcptr *)&patch->ipt_wrap,
359 patch->ipt_argcnt, patch->ipt_ftype);
363 for (i = 0; i < NTOSKRNL_TIMEOUTS; i++) {
364 e = ExAllocatePoolWithTag(NonPagedPool,
365 sizeof(callout_entry), 0);
367 panic("failed to allocate timeouts");
368 mtx_spinlock(&ntoskrnl_calllock);
369 InsertHeadList((&ntoskrnl_calllist), (&e->ce_list));
370 mtx_spinunlock(&ntoskrnl_calllock);
374 * MDLs are supposed to be variable size (they describe
375 * buffers containing some number of pages, but we don't
376 * know ahead of time how many pages that will be). But
377 * always allocating them off the heap is very slow. As
378 * a compromise, we create an MDL UMA zone big enough to
379 * handle any buffer requiring up to 16 pages, and we
380 * use those for any MDLs for buffers of 16 pages or less
381 * in size. For buffers larger than that (which we assume
382 * will be few and far between, we allocate the MDLs off
385 * CHANGED TO USING objcache(9) IN DRAGONFLY
388 mdl_cache = objcache_create("Windows MDL", 0, 0,
389 NULL, NULL, NULL, objcache_malloc_alloc, objcache_malloc_free,
392 iw_cache = objcache_create("Windows WorkItem", 0, 0,
393 NULL, NULL, NULL, objcache_malloc_alloc, objcache_malloc_free,
400 ntoskrnl_libfini(void)
402 image_patch_table *patch;
406 patch = ntoskrnl_functbl;
407 while (patch->ipt_func != NULL) {
408 windrv_unwrap(patch->ipt_wrap);
412 /* Stop the workitem queues. */
413 ntoskrnl_destroy_workitem_threads();
414 /* Stop the DPC queues. */
415 ntoskrnl_destroy_dpc_threads();
417 ExFreePool(kq_queues);
418 ExFreePool(wq_queues);
420 objcache_destroy(mdl_cache);
421 objcache_destroy(iw_cache);
423 mtx_spinlock(&ntoskrnl_calllock);
424 while(!IsListEmpty(&ntoskrnl_calllist)) {
425 l = RemoveHeadList(&ntoskrnl_calllist);
426 e = CONTAINING_RECORD(l, callout_entry, ce_list);
427 mtx_spinunlock(&ntoskrnl_calllock);
429 mtx_spinlock(&ntoskrnl_calllock);
431 mtx_spinunlock(&ntoskrnl_calllock);
433 lockuninit(&ntoskrnl_dispatchlock);
434 mtx_uninit(&ntoskrnl_interlock);
435 mtx_uninit(&ntoskrnl_calllock);
441 * We need to be able to reference this externally from the wrapper;
442 * GCC only generates a local implementation of memset.
445 ntoskrnl_memset(void *buf, int ch, size_t size)
447 return (memset(buf, ch, size));
451 ntoskrnl_memmove(void *dst, void *src, size_t size)
453 bcopy(src, dst, size);
458 ntoskrnl_memchr(void *buf, unsigned char ch, size_t len)
461 unsigned char *p = buf;
466 } while (--len != 0);
472 ntoskrnl_strstr(char *s, char *find)
477 if ((c = *find++) != 0) {
481 if ((sc = *s++) == 0)
484 } while (strncmp(s, find, len) != 0);
490 /* Taken from libc */
492 ntoskrnl_strncat(char *dst, char *src, size_t n)
501 if ((*d = *s++) == 0)
511 ntoskrnl_toupper(int c)
517 ntoskrnl_tolower(int c)
523 RtlEqualUnicodeString(unicode_string *str1, unicode_string *str2,
524 uint8_t caseinsensitive)
528 if (str1->us_len != str2->us_len)
531 for (i = 0; i < str1->us_len; i++) {
532 if (caseinsensitive == TRUE) {
533 if (toupper((char)(str1->us_buf[i] & 0xFF)) !=
534 toupper((char)(str2->us_buf[i] & 0xFF)))
537 if (str1->us_buf[i] != str2->us_buf[i])
546 RtlCopyString(ansi_string *dst, const ansi_string *src)
548 if (src != NULL && src->as_buf != NULL && dst->as_buf != NULL) {
549 dst->as_len = min(src->as_len, dst->as_maxlen);
550 memcpy(dst->as_buf, src->as_buf, dst->as_len);
551 if (dst->as_len < dst->as_maxlen)
552 dst->as_buf[dst->as_len] = 0;
558 RtlCopyUnicodeString(unicode_string *dest, unicode_string *src)
561 if (dest->us_maxlen >= src->us_len)
562 dest->us_len = src->us_len;
564 dest->us_len = dest->us_maxlen;
565 memcpy(dest->us_buf, src->us_buf, dest->us_len);
569 ntoskrnl_ascii_to_unicode(char *ascii, uint16_t *unicode, int len)
575 for (i = 0; i < len; i++) {
576 *ustr = (uint16_t)ascii[i];
582 ntoskrnl_unicode_to_ascii(uint16_t *unicode, char *ascii, int len)
588 for (i = 0; i < len / 2; i++) {
589 *astr = (uint8_t)unicode[i];
595 RtlUnicodeStringToAnsiString(ansi_string *dest, unicode_string *src, uint8_t allocate)
597 if (dest == NULL || src == NULL)
598 return (STATUS_INVALID_PARAMETER);
600 dest->as_len = src->us_len / 2;
601 if (dest->as_maxlen < dest->as_len)
602 dest->as_len = dest->as_maxlen;
604 if (allocate == TRUE) {
605 dest->as_buf = ExAllocatePoolWithTag(NonPagedPool,
606 (src->us_len / 2) + 1, 0);
607 if (dest->as_buf == NULL)
608 return (STATUS_INSUFFICIENT_RESOURCES);
609 dest->as_len = dest->as_maxlen = src->us_len / 2;
611 dest->as_len = src->us_len / 2; /* XXX */
612 if (dest->as_maxlen < dest->as_len)
613 dest->as_len = dest->as_maxlen;
616 ntoskrnl_unicode_to_ascii(src->us_buf, dest->as_buf,
619 return (STATUS_SUCCESS);
623 RtlAnsiStringToUnicodeString(unicode_string *dest, ansi_string *src,
626 if (dest == NULL || src == NULL)
627 return (STATUS_INVALID_PARAMETER);
629 if (allocate == TRUE) {
630 dest->us_buf = ExAllocatePoolWithTag(NonPagedPool,
632 if (dest->us_buf == NULL)
633 return (STATUS_INSUFFICIENT_RESOURCES);
634 dest->us_len = dest->us_maxlen = strlen(src->as_buf) * 2;
636 dest->us_len = src->as_len * 2; /* XXX */
637 if (dest->us_maxlen < dest->us_len)
638 dest->us_len = dest->us_maxlen;
641 ntoskrnl_ascii_to_unicode(src->as_buf, dest->us_buf,
644 return (STATUS_SUCCESS);
648 ExAllocatePoolWithTag(uint32_t pooltype, size_t len, uint32_t tag)
652 buf = kmalloc(len, M_DEVBUF, M_NOWAIT|M_ZERO);
660 ExFreePoolWithTag(void *buf, uint32_t tag)
666 ExFreePool(void *buf)
668 kfree(buf, M_DEVBUF);
672 IoAllocateDriverObjectExtension(driver_object *drv, void *clid,
673 uint32_t extlen, void **ext)
675 custom_extension *ce;
677 ce = ExAllocatePoolWithTag(NonPagedPool, sizeof(custom_extension)
681 return (STATUS_INSUFFICIENT_RESOURCES);
684 InsertTailList((&drv->dro_driverext->dre_usrext), (&ce->ce_list));
686 *ext = (void *)(ce + 1);
688 return (STATUS_SUCCESS);
692 IoGetDriverObjectExtension(driver_object *drv, void *clid)
695 custom_extension *ce;
698 * Sanity check. Our dummy bus drivers don't have
699 * any driver extentions.
702 if (drv->dro_driverext == NULL)
705 e = drv->dro_driverext->dre_usrext.nle_flink;
706 while (e != &drv->dro_driverext->dre_usrext) {
707 ce = (custom_extension *)e;
708 if (ce->ce_clid == clid)
709 return ((void *)(ce + 1));
718 IoCreateDevice(driver_object *drv, uint32_t devextlen, unicode_string *devname,
719 uint32_t devtype, uint32_t devchars, uint8_t exclusive,
720 device_object **newdev)
724 dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device_object), 0);
726 return (STATUS_INSUFFICIENT_RESOURCES);
728 dev->do_type = devtype;
729 dev->do_drvobj = drv;
730 dev->do_currirp = NULL;
734 dev->do_devext = ExAllocatePoolWithTag(NonPagedPool,
737 if (dev->do_devext == NULL) {
739 return (STATUS_INSUFFICIENT_RESOURCES);
742 bzero(dev->do_devext, devextlen);
744 dev->do_devext = NULL;
746 dev->do_size = sizeof(device_object) + devextlen;
748 dev->do_attacheddev = NULL;
749 dev->do_nextdev = NULL;
750 dev->do_devtype = devtype;
751 dev->do_stacksize = 1;
752 dev->do_alignreq = 1;
753 dev->do_characteristics = devchars;
754 dev->do_iotimer = NULL;
755 KeInitializeEvent(&dev->do_devlock, EVENT_TYPE_SYNC, TRUE);
758 * Vpd is used for disk/tape devices,
759 * but we don't support those. (Yet.)
763 dev->do_devobj_ext = ExAllocatePoolWithTag(NonPagedPool,
764 sizeof(devobj_extension), 0);
766 if (dev->do_devobj_ext == NULL) {
767 if (dev->do_devext != NULL)
768 ExFreePool(dev->do_devext);
770 return (STATUS_INSUFFICIENT_RESOURCES);
773 dev->do_devobj_ext->dve_type = 0;
774 dev->do_devobj_ext->dve_size = sizeof(devobj_extension);
775 dev->do_devobj_ext->dve_devobj = dev;
778 * Attach this device to the driver object's list
779 * of devices. Note: this is not the same as attaching
780 * the device to the device stack. The driver's AddDevice
781 * routine must explicitly call IoAddDeviceToDeviceStack()
785 if (drv->dro_devobj == NULL) {
786 drv->dro_devobj = dev;
787 dev->do_nextdev = NULL;
789 dev->do_nextdev = drv->dro_devobj;
790 drv->dro_devobj = dev;
795 return (STATUS_SUCCESS);
799 IoDeleteDevice(device_object *dev)
806 if (dev->do_devobj_ext != NULL)
807 ExFreePool(dev->do_devobj_ext);
809 if (dev->do_devext != NULL)
810 ExFreePool(dev->do_devext);
812 /* Unlink the device from the driver's device list. */
814 prev = dev->do_drvobj->dro_devobj;
816 dev->do_drvobj->dro_devobj = dev->do_nextdev;
818 while (prev->do_nextdev != dev)
819 prev = prev->do_nextdev;
820 prev->do_nextdev = dev->do_nextdev;
827 IoGetAttachedDevice(device_object *dev)
836 while (d->do_attacheddev != NULL)
837 d = d->do_attacheddev;
843 IoBuildSynchronousFsdRequest(uint32_t func, device_object *dobj, void *buf,
844 uint32_t len, uint64_t *off, nt_kevent *event, io_status_block *status)
848 ip = IoBuildAsynchronousFsdRequest(func, dobj, buf, len, off, status);
851 ip->irp_usrevent = event;
857 IoBuildAsynchronousFsdRequest(uint32_t func, device_object *dobj, void *buf,
858 uint32_t len, uint64_t *off, io_status_block *status)
861 io_stack_location *sl;
863 ip = IoAllocateIrp(dobj->do_stacksize, TRUE);
867 ip->irp_usriostat = status;
868 ip->irp_tail.irp_overlay.irp_thread = NULL;
870 sl = IoGetNextIrpStackLocation(ip);
871 sl->isl_major = func;
875 sl->isl_devobj = dobj;
876 sl->isl_fileobj = NULL;
877 sl->isl_completionfunc = NULL;
879 ip->irp_userbuf = buf;
881 if (dobj->do_flags & DO_BUFFERED_IO) {
882 ip->irp_assoc.irp_sysbuf =
883 ExAllocatePoolWithTag(NonPagedPool, len, 0);
884 if (ip->irp_assoc.irp_sysbuf == NULL) {
888 bcopy(buf, ip->irp_assoc.irp_sysbuf, len);
891 if (dobj->do_flags & DO_DIRECT_IO) {
892 ip->irp_mdl = IoAllocateMdl(buf, len, FALSE, FALSE, ip);
893 if (ip->irp_mdl == NULL) {
894 if (ip->irp_assoc.irp_sysbuf != NULL)
895 ExFreePool(ip->irp_assoc.irp_sysbuf);
899 ip->irp_userbuf = NULL;
900 ip->irp_assoc.irp_sysbuf = NULL;
903 if (func == IRP_MJ_READ) {
904 sl->isl_parameters.isl_read.isl_len = len;
906 sl->isl_parameters.isl_read.isl_byteoff = *off;
908 sl->isl_parameters.isl_read.isl_byteoff = 0;
911 if (func == IRP_MJ_WRITE) {
912 sl->isl_parameters.isl_write.isl_len = len;
914 sl->isl_parameters.isl_write.isl_byteoff = *off;
916 sl->isl_parameters.isl_write.isl_byteoff = 0;
923 IoBuildDeviceIoControlRequest(uint32_t iocode, device_object *dobj, void *ibuf,
924 uint32_t ilen, void *obuf, uint32_t olen, uint8_t isinternal,
925 nt_kevent *event, io_status_block *status)
928 io_stack_location *sl;
931 ip = IoAllocateIrp(dobj->do_stacksize, TRUE);
934 ip->irp_usrevent = event;
935 ip->irp_usriostat = status;
936 ip->irp_tail.irp_overlay.irp_thread = NULL;
938 sl = IoGetNextIrpStackLocation(ip);
939 sl->isl_major = isinternal == TRUE ?
940 IRP_MJ_INTERNAL_DEVICE_CONTROL : IRP_MJ_DEVICE_CONTROL;
944 sl->isl_devobj = dobj;
945 sl->isl_fileobj = NULL;
946 sl->isl_completionfunc = NULL;
947 sl->isl_parameters.isl_ioctl.isl_iocode = iocode;
948 sl->isl_parameters.isl_ioctl.isl_ibuflen = ilen;
949 sl->isl_parameters.isl_ioctl.isl_obuflen = olen;
951 switch(IO_METHOD(iocode)) {
952 case METHOD_BUFFERED:
958 ip->irp_assoc.irp_sysbuf =
959 ExAllocatePoolWithTag(NonPagedPool, buflen, 0);
960 if (ip->irp_assoc.irp_sysbuf == NULL) {
965 if (ilen && ibuf != NULL) {
966 bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen);
967 bzero((char *)ip->irp_assoc.irp_sysbuf + ilen,
970 bzero(ip->irp_assoc.irp_sysbuf, ilen);
971 ip->irp_userbuf = obuf;
973 case METHOD_IN_DIRECT:
974 case METHOD_OUT_DIRECT:
975 if (ilen && ibuf != NULL) {
976 ip->irp_assoc.irp_sysbuf =
977 ExAllocatePoolWithTag(NonPagedPool, ilen, 0);
978 if (ip->irp_assoc.irp_sysbuf == NULL) {
982 bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen);
984 if (olen && obuf != NULL) {
985 ip->irp_mdl = IoAllocateMdl(obuf, olen,
988 * Normally we would MmProbeAndLockPages()
989 * here, but we don't have to in our
995 ip->irp_userbuf = obuf;
996 sl->isl_parameters.isl_ioctl.isl_type3ibuf = ibuf;
1003 * Ideally, we should associate this IRP with the calling
1011 IoAllocateIrp(uint8_t stsize, uint8_t chargequota)
1015 i = ExAllocatePoolWithTag(NonPagedPool, IoSizeOfIrp(stsize), 0);
1019 IoInitializeIrp(i, IoSizeOfIrp(stsize), stsize);
1025 IoMakeAssociatedIrp(irp *ip, uint8_t stsize)
1029 associrp = IoAllocateIrp(stsize, FALSE);
1030 if (associrp == NULL)
1033 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
1034 associrp->irp_flags |= IRP_ASSOCIATED_IRP;
1035 associrp->irp_tail.irp_overlay.irp_thread =
1036 ip->irp_tail.irp_overlay.irp_thread;
1037 associrp->irp_assoc.irp_master = ip;
1038 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1050 IoInitializeIrp(irp *io, uint16_t psize, uint8_t ssize)
1052 bzero((char *)io, IoSizeOfIrp(ssize));
1053 io->irp_size = psize;
1054 io->irp_stackcnt = ssize;
1055 io->irp_currentstackloc = ssize;
1056 InitializeListHead(&io->irp_thlist);
1057 io->irp_tail.irp_overlay.irp_csl =
1058 (io_stack_location *)(io + 1) + ssize;
1062 IoReuseIrp(irp *ip, uint32_t status)
1066 allocflags = ip->irp_allocflags;
1067 IoInitializeIrp(ip, ip->irp_size, ip->irp_stackcnt);
1068 ip->irp_iostat.isb_status = status;
1069 ip->irp_allocflags = allocflags;
1073 IoAcquireCancelSpinLock(uint8_t *irql)
1075 KeAcquireSpinLock(&ntoskrnl_cancellock, irql);
1079 IoReleaseCancelSpinLock(uint8_t irql)
1081 KeReleaseSpinLock(&ntoskrnl_cancellock, irql);
1085 IoCancelIrp(irp *ip)
1090 IoAcquireCancelSpinLock(&cancelirql);
1091 cfunc = IoSetCancelRoutine(ip, NULL);
1092 ip->irp_cancel = TRUE;
1093 if (cfunc == NULL) {
1094 IoReleaseCancelSpinLock(cancelirql);
1097 ip->irp_cancelirql = cancelirql;
1098 MSCALL2(cfunc, IoGetCurrentIrpStackLocation(ip)->isl_devobj, ip);
1099 return (uint8_t)IoSetCancelValue(ip, TRUE);
1103 IofCallDriver(device_object *dobj, irp *ip)
1105 driver_object *drvobj;
1106 io_stack_location *sl;
1108 driver_dispatch disp;
1110 drvobj = dobj->do_drvobj;
1112 if (ip->irp_currentstackloc <= 0)
1113 panic("IoCallDriver(): out of stack locations");
1115 IoSetNextIrpStackLocation(ip);
1116 sl = IoGetCurrentIrpStackLocation(ip);
1118 sl->isl_devobj = dobj;
1120 disp = drvobj->dro_dispatch[sl->isl_major];
1121 status = MSCALL2(disp, dobj, ip);
1127 IofCompleteRequest(irp *ip, uint8_t prioboost)
1130 device_object *dobj;
1131 io_stack_location *sl;
1134 KASSERT(ip->irp_iostat.isb_status != STATUS_PENDING,
1135 ("incorrect IRP(%p) status (STATUS_PENDING)", ip));
1137 sl = IoGetCurrentIrpStackLocation(ip);
1138 IoSkipCurrentIrpStackLocation(ip);
1141 if (sl->isl_ctl & SL_PENDING_RETURNED)
1142 ip->irp_pendingreturned = TRUE;
1144 if (ip->irp_currentstackloc != (ip->irp_stackcnt + 1))
1145 dobj = IoGetCurrentIrpStackLocation(ip)->isl_devobj;
1149 if (sl->isl_completionfunc != NULL &&
1150 ((ip->irp_iostat.isb_status == STATUS_SUCCESS &&
1151 sl->isl_ctl & SL_INVOKE_ON_SUCCESS) ||
1152 (ip->irp_iostat.isb_status != STATUS_SUCCESS &&
1153 sl->isl_ctl & SL_INVOKE_ON_ERROR) ||
1154 (ip->irp_cancel == TRUE &&
1155 sl->isl_ctl & SL_INVOKE_ON_CANCEL))) {
1156 cf = sl->isl_completionfunc;
1157 status = MSCALL3(cf, dobj, ip, sl->isl_completionctx);
1158 if (status == STATUS_MORE_PROCESSING_REQUIRED)
1161 if ((ip->irp_currentstackloc <= ip->irp_stackcnt) &&
1162 (ip->irp_pendingreturned == TRUE))
1163 IoMarkIrpPending(ip);
1166 /* move to the next. */
1167 IoSkipCurrentIrpStackLocation(ip);
1169 } while (ip->irp_currentstackloc <= (ip->irp_stackcnt + 1));
1171 if (ip->irp_usriostat != NULL)
1172 *ip->irp_usriostat = ip->irp_iostat;
1173 if (ip->irp_usrevent != NULL)
1174 KeSetEvent(ip->irp_usrevent, prioboost, FALSE);
1176 /* Handle any associated IRPs. */
1178 if (ip->irp_flags & IRP_ASSOCIATED_IRP) {
1179 uint32_t masterirpcnt;
1183 masterirp = ip->irp_assoc.irp_master;
1185 InterlockedDecrement(&masterirp->irp_assoc.irp_irpcnt);
1187 while ((m = ip->irp_mdl) != NULL) {
1188 ip->irp_mdl = m->mdl_next;
1192 if (masterirpcnt == 0)
1193 IoCompleteRequest(masterirp, IO_NO_INCREMENT);
1197 /* With any luck, these conditions will never arise. */
1199 if (ip->irp_flags & IRP_PAGING_IO) {
1200 if (ip->irp_mdl != NULL)
1201 IoFreeMdl(ip->irp_mdl);
1207 ntoskrnl_intr(void *arg)
1214 KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
1215 l = ntoskrnl_intlist.nle_flink;
1216 while (l != &ntoskrnl_intlist) {
1217 iobj = CONTAINING_RECORD(l, kinterrupt, ki_list);
1218 claimed = MSCALL2(iobj->ki_svcfunc, iobj, iobj->ki_svcctx);
1219 if (claimed == TRUE)
1223 KeReleaseSpinLock(&ntoskrnl_intlock, irql);
1227 KeAcquireInterruptSpinLock(kinterrupt *iobj)
1230 KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
1235 KeReleaseInterruptSpinLock(kinterrupt *iobj, uint8_t irql)
1237 KeReleaseSpinLock(&ntoskrnl_intlock, irql);
1241 KeSynchronizeExecution(kinterrupt *iobj, void *syncfunc, void *syncctx)
1245 KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
1246 MSCALL1(syncfunc, syncctx);
1247 KeReleaseSpinLock(&ntoskrnl_intlock, irql);
1253 * IoConnectInterrupt() is passed only the interrupt vector and
1254 * irql that a device wants to use, but no device-specific tag
1255 * of any kind. This conflicts rather badly with FreeBSD's
1256 * bus_setup_intr(), which needs the device_t for the device
1257 * requesting interrupt delivery. In order to bypass this
1258 * inconsistency, we implement a second level of interrupt
1259 * dispatching on top of bus_setup_intr(). All devices use
1260 * ntoskrnl_intr() as their ISR, and any device requesting
1261 * interrupts will be registered with ntoskrnl_intr()'s interrupt
1262 * dispatch list. When an interrupt arrives, we walk the list
1263 * and invoke all the registered ISRs. This effectively makes all
1264 * interrupts shared, but it's the only way to duplicate the
1265 * semantics of IoConnectInterrupt() and IoDisconnectInterrupt() properly.
1269 IoConnectInterrupt(kinterrupt **iobj, void *svcfunc, void *svcctx,
1270 kspin_lock *lock, uint32_t vector, uint8_t irql, uint8_t syncirql,
1271 uint8_t imode, uint8_t shared, uint32_t affinity, uint8_t savefloat)
1275 *iobj = ExAllocatePoolWithTag(NonPagedPool, sizeof(kinterrupt), 0);
1277 return (STATUS_INSUFFICIENT_RESOURCES);
1279 (*iobj)->ki_svcfunc = svcfunc;
1280 (*iobj)->ki_svcctx = svcctx;
1283 KeInitializeSpinLock(&(*iobj)->ki_lock_priv);
1284 (*iobj)->ki_lock = &(*iobj)->ki_lock_priv;
1286 (*iobj)->ki_lock = lock;
1288 KeAcquireSpinLock(&ntoskrnl_intlock, &curirql);
1289 InsertHeadList((&ntoskrnl_intlist), (&(*iobj)->ki_list));
1290 KeReleaseSpinLock(&ntoskrnl_intlock, curirql);
1292 return (STATUS_SUCCESS);
1296 IoDisconnectInterrupt(kinterrupt *iobj)
1303 KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
1304 RemoveEntryList((&iobj->ki_list));
1305 KeReleaseSpinLock(&ntoskrnl_intlock, irql);
1311 IoAttachDeviceToDeviceStack(device_object *src, device_object *dst)
1313 device_object *attached;
1315 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
1316 attached = IoGetAttachedDevice(dst);
1317 attached->do_attacheddev = src;
1318 src->do_attacheddev = NULL;
1319 src->do_stacksize = attached->do_stacksize + 1;
1320 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1326 IoDetachDevice(device_object *topdev)
1328 device_object *tail;
1330 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
1332 /* First, break the chain. */
1333 tail = topdev->do_attacheddev;
1335 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1338 topdev->do_attacheddev = tail->do_attacheddev;
1339 topdev->do_refcnt--;
1341 /* Now reduce the stacksize count for the takm_il objects. */
1343 tail = topdev->do_attacheddev;
1344 while (tail != NULL) {
1345 tail->do_stacksize--;
1346 tail = tail->do_attacheddev;
1349 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1353 * For the most part, an object is considered signalled if
1354 * dh_sigstate == TRUE. The exception is for mutant objects
1355 * (mutexes), where the logic works like this:
1357 * - If the thread already owns the object and sigstate is
1358 * less than or equal to 0, then the object is considered
1359 * signalled (recursive acquisition).
1360 * - If dh_sigstate == 1, the object is also considered
1365 ntoskrnl_is_signalled(nt_dispatch_header *obj, struct thread *td)
1369 if (obj->dh_type == DISP_TYPE_MUTANT) {
1370 km = (kmutant *)obj;
1371 if ((obj->dh_sigstate <= 0 && km->km_ownerthread == td) ||
1372 obj->dh_sigstate == 1)
1377 if (obj->dh_sigstate > 0)
1383 ntoskrnl_satisfy_wait(nt_dispatch_header *obj, struct thread *td)
1387 switch (obj->dh_type) {
1388 case DISP_TYPE_MUTANT:
1389 km = (struct kmutant *)obj;
1392 * If sigstate reaches 0, the mutex is now
1393 * non-signalled (the new thread owns it).
1395 if (obj->dh_sigstate == 0) {
1396 km->km_ownerthread = td;
1397 if (km->km_abandoned == TRUE)
1398 km->km_abandoned = FALSE;
1401 /* Synchronization objects get reset to unsignalled. */
1402 case DISP_TYPE_SYNCHRONIZATION_EVENT:
1403 case DISP_TYPE_SYNCHRONIZATION_TIMER:
1404 obj->dh_sigstate = 0;
1406 case DISP_TYPE_SEMAPHORE:
1415 ntoskrnl_satisfy_multiple_waits(wait_block *wb)
1421 td = wb->wb_kthread;
1424 ntoskrnl_satisfy_wait(wb->wb_object, td);
1425 cur->wb_awakened = TRUE;
1427 } while (cur != wb);
1430 /* Always called with dispatcher lock held. */
1432 ntoskrnl_waittest(nt_dispatch_header *obj, uint32_t increment)
1434 wait_block *w, *next;
1441 * Once an object has been signalled, we walk its list of
1442 * wait blocks. If a wait block can be awakened, then satisfy
1443 * waits as necessary and wake the thread.
1445 * The rules work like this:
1447 * If a wait block is marked as WAITTYPE_ANY, then
1448 * we can satisfy the wait conditions on the current
1449 * object and wake the thread right away. Satisfying
1450 * the wait also has the effect of breaking us out
1451 * of the search loop.
1453 * If the object is marked as WAITTYLE_ALL, then the
1454 * wait block will be part of a circularly linked
1455 * list of wait blocks belonging to a waiting thread
1456 * that's sleeping in KeWaitForMultipleObjects(). In
1457 * order to wake the thread, all the objects in the
1458 * wait list must be in the signalled state. If they
1459 * are, we then satisfy all of them and wake the
1464 e = obj->dh_waitlisthead.nle_flink;
1466 while (e != &obj->dh_waitlisthead && obj->dh_sigstate > 0) {
1467 w = CONTAINING_RECORD(e, wait_block, wb_waitlist);
1471 if (w->wb_waittype == WAITTYPE_ANY) {
1473 * Thread can be awakened if
1474 * any wait is satisfied.
1476 ntoskrnl_satisfy_wait(obj, td);
1478 w->wb_awakened = TRUE;
1481 * Thread can only be woken up
1482 * if all waits are satisfied.
1483 * If the thread is waiting on multiple
1484 * objects, they should all be linked
1485 * through the wb_next pointers in the
1491 if (ntoskrnl_is_signalled(obj, td) == FALSE) {
1495 next = next->wb_next;
1497 ntoskrnl_satisfy_multiple_waits(w);
1500 if (satisfied == TRUE)
1501 cv_broadcastpri(&we->we_cv,
1502 (w->wb_oldpri - (increment * 4)) > PRI_MIN_KERN ?
1503 w->wb_oldpri - (increment * 4) : PRI_MIN_KERN);
1510 * Return the number of 100 nanosecond intervals since
1511 * January 1, 1601. (?!?!)
1514 ntoskrnl_time(uint64_t *tval)
1519 *tval = (uint64_t)ts.tv_nsec / 100 + (uint64_t)ts.tv_sec * 10000000 +
1520 11644473600 * 10000000; /* 100ns ticks from 1601 to 1970 */
1524 KeQuerySystemTime(uint64_t *current_time)
1526 ntoskrnl_time(current_time);
1533 getmicrouptime(&tv);
1534 return tvtohz_high(&tv);
1539 * KeWaitForSingleObject() is a tricky beast, because it can be used
1540 * with several different object types: semaphores, timers, events,
1541 * mutexes and threads. Semaphores don't appear very often, but the
1542 * other object types are quite common. KeWaitForSingleObject() is
1543 * what's normally used to acquire a mutex, and it can be used to
1544 * wait for a thread termination.
1546 * The Windows NDIS API is implemented in terms of Windows kernel
1547 * primitives, and some of the object manipulation is duplicated in
1548 * NDIS. For example, NDIS has timers and events, which are actually
1549 * Windows kevents and ktimers. Now, you're supposed to only use the
1550 * NDIS variants of these objects within the confines of the NDIS API,
1551 * but there are some naughty developers out there who will use
1552 * KeWaitForSingleObject() on NDIS timer and event objects, so we
1553 * have to support that as well. Conseqently, our NDIS timer and event
1554 * code has to be closely tied into our ntoskrnl timer and event code,
1555 * just as it is in Windows.
1557 * KeWaitForSingleObject() may do different things for different kinds
1560 * - For events, we check if the event has been signalled. If the
1561 * event is already in the signalled state, we just return immediately,
1562 * otherwise we wait for it to be set to the signalled state by someone
1563 * else calling KeSetEvent(). Events can be either synchronization or
1564 * notification events.
1566 * - For timers, if the timer has already fired and the timer is in
1567 * the signalled state, we just return, otherwise we wait on the
1568 * timer. Unlike an event, timers get signalled automatically when
1569 * they expire rather than someone having to trip them manually.
1570 * Timers initialized with KeInitializeTimer() are always notification
1571 * events: KeInitializeTimerEx() lets you initialize a timer as
1572 * either a notification or synchronization event.
1574 * - For mutexes, we try to acquire the mutex and if we can't, we wait
1575 * on the mutex until it's available and then grab it. When a mutex is
1576 * released, it enters the signalled state, which wakes up one of the
1577 * threads waiting to acquire it. Mutexes are always synchronization
1580 * - For threads, the only thing we do is wait until the thread object
1581 * enters a signalled state, which occurs when the thread terminates.
1582 * Threads are always notification events.
1584 * A notification event wakes up all threads waiting on an object. A
1585 * synchronization event wakes up just one. Also, a synchronization event
1586 * is auto-clearing, which means we automatically set the event back to
1587 * the non-signalled state once the wakeup is done.
1591 KeWaitForSingleObject(void *arg, uint32_t reason, uint32_t mode,
1592 uint8_t alertable, int64_t *duetime)
1595 struct thread *td = curthread;
1600 nt_dispatch_header *obj;
1605 return (STATUS_INVALID_PARAMETER);
1607 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
1609 cv_init(&we.we_cv, "KeWFS");
1613 * Check to see if this object is already signalled,
1614 * and just return without waiting if it is.
1616 if (ntoskrnl_is_signalled(obj, td) == TRUE) {
1617 /* Sanity check the signal state value. */
1618 if (obj->dh_sigstate != INT32_MIN) {
1619 ntoskrnl_satisfy_wait(obj, curthread);
1620 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1621 return (STATUS_SUCCESS);
1624 * There's a limit to how many times we can
1625 * recursively acquire a mutant. If we hit
1626 * the limit, something is very wrong.
1628 if (obj->dh_type == DISP_TYPE_MUTANT) {
1629 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1630 panic("mutant limit exceeded");
1635 bzero((char *)&w, sizeof(wait_block));
1638 w.wb_waittype = WAITTYPE_ANY;
1641 w.wb_awakened = FALSE;
1642 w.wb_oldpri = td->td_pri;
1644 InsertTailList((&obj->dh_waitlisthead), (&w.wb_waitlist));
1647 * The timeout value is specified in 100 nanosecond units
1648 * and can be a positive or negative number. If it's positive,
1649 * then the duetime is absolute, and we need to convert it
1650 * to an absolute offset relative to now in order to use it.
1651 * If it's negative, then the duetime is relative and we
1652 * just have to convert the units.
1655 if (duetime != NULL) {
1657 tv.tv_sec = - (*duetime) / 10000000;
1658 tv.tv_usec = (- (*duetime) / 10) -
1659 (tv.tv_sec * 1000000);
1661 ntoskrnl_time(&curtime);
1662 if (*duetime < curtime)
1663 tv.tv_sec = tv.tv_usec = 0;
1665 tv.tv_sec = ((*duetime) - curtime) / 10000000;
1666 tv.tv_usec = ((*duetime) - curtime) / 10 -
1667 (tv.tv_sec * 1000000);
1672 if (duetime == NULL)
1673 cv_wait(&we.we_cv, &ntoskrnl_dispatchlock);
1675 error = cv_timedwait(&we.we_cv,
1676 &ntoskrnl_dispatchlock, tvtohz_high(&tv));
1678 RemoveEntryList(&w.wb_waitlist);
1680 cv_destroy(&we.we_cv);
1682 /* We timed out. Leave the object alone and return status. */
1684 if (error == EWOULDBLOCK) {
1685 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1686 return (STATUS_TIMEOUT);
1689 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1691 return (STATUS_SUCCESS);
1693 return (KeWaitForMultipleObjects(1, &obj, WAITTYPE_ALL, reason,
1694 mode, alertable, duetime, &w));
1699 KeWaitForMultipleObjects(uint32_t cnt, nt_dispatch_header *obj[], uint32_t wtype,
1700 uint32_t reason, uint32_t mode, uint8_t alertable, int64_t *duetime,
1701 wait_block *wb_array)
1703 struct thread *td = curthread;
1704 wait_block *whead, *w;
1705 wait_block _wb_array[MAX_WAIT_OBJECTS];
1706 nt_dispatch_header *cur;
1708 int i, wcnt = 0, error = 0;
1710 struct timespec t1, t2;
1711 uint32_t status = STATUS_SUCCESS;
1714 if (cnt > MAX_WAIT_OBJECTS)
1715 return (STATUS_INVALID_PARAMETER);
1716 if (cnt > THREAD_WAIT_OBJECTS && wb_array == NULL)
1717 return (STATUS_INVALID_PARAMETER);
1719 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
1721 cv_init(&we.we_cv, "KeWFM");
1724 if (wb_array == NULL)
1729 bzero((char *)whead, sizeof(wait_block) * cnt);
1731 /* First pass: see if we can satisfy any waits immediately. */
1736 for (i = 0; i < cnt; i++) {
1737 InsertTailList((&obj[i]->dh_waitlisthead),
1740 w->wb_object = obj[i];
1741 w->wb_waittype = wtype;
1743 w->wb_awakened = FALSE;
1744 w->wb_oldpri = td->td_pri;
1748 if (ntoskrnl_is_signalled(obj[i], td)) {
1750 * There's a limit to how many times
1751 * we can recursively acquire a mutant.
1752 * If we hit the limit, something
1755 if (obj[i]->dh_sigstate == INT32_MIN &&
1756 obj[i]->dh_type == DISP_TYPE_MUTANT) {
1757 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1758 panic("mutant limit exceeded");
1762 * If this is a WAITTYPE_ANY wait, then
1763 * satisfy the waited object and exit
1767 if (wtype == WAITTYPE_ANY) {
1768 ntoskrnl_satisfy_wait(obj[i], td);
1769 status = STATUS_WAIT_0 + i;
1774 w->wb_object = NULL;
1775 RemoveEntryList(&w->wb_waitlist);
1781 * If this is a WAITTYPE_ALL wait and all objects are
1782 * already signalled, satisfy the waits and exit now.
1785 if (wtype == WAITTYPE_ALL && wcnt == 0) {
1786 for (i = 0; i < cnt; i++)
1787 ntoskrnl_satisfy_wait(obj[i], td);
1788 status = STATUS_SUCCESS;
1793 * Create a circular waitblock list. The waitcount
1794 * must always be non-zero when we get here.
1797 (w - 1)->wb_next = whead;
1799 /* Wait on any objects that aren't yet signalled. */
1801 /* Calculate timeout, if any. */
1803 if (duetime != NULL) {
1805 tv.tv_sec = - (*duetime) / 10000000;
1806 tv.tv_usec = (- (*duetime) / 10) -
1807 (tv.tv_sec * 1000000);
1809 ntoskrnl_time(&curtime);
1810 if (*duetime < curtime)
1811 tv.tv_sec = tv.tv_usec = 0;
1813 tv.tv_sec = ((*duetime) - curtime) / 10000000;
1814 tv.tv_usec = ((*duetime) - curtime) / 10 -
1815 (tv.tv_sec * 1000000);
1823 if (duetime == NULL)
1824 cv_wait(&we.we_cv, &ntoskrnl_dispatchlock);
1826 error = cv_timedwait(&we.we_cv,
1827 &ntoskrnl_dispatchlock, tvtohz_high(&tv));
1829 /* Wait with timeout expired. */
1832 status = STATUS_TIMEOUT;
1838 /* See what's been signalled. */
1843 if (ntoskrnl_is_signalled(cur, td) == TRUE ||
1844 w->wb_awakened == TRUE) {
1845 /* Sanity check the signal state value. */
1846 if (cur->dh_sigstate == INT32_MIN &&
1847 cur->dh_type == DISP_TYPE_MUTANT) {
1848 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1849 panic("mutant limit exceeded");
1852 if (wtype == WAITTYPE_ANY) {
1853 status = w->wb_waitkey &
1859 } while (w != whead);
1862 * If all objects have been signalled, or if this
1863 * is a WAITTYPE_ANY wait and we were woke up by
1864 * someone, we can bail.
1868 status = STATUS_SUCCESS;
1873 * If this is WAITTYPE_ALL wait, and there's still
1874 * objects that haven't been signalled, deduct the
1875 * time that's elapsed so far from the timeout and
1876 * wait again (or continue waiting indefinitely if
1877 * there's no timeout).
1880 if (duetime != NULL) {
1881 tv.tv_sec -= (t2.tv_sec - t1.tv_sec);
1882 tv.tv_usec -= (t2.tv_nsec - t1.tv_nsec) / 1000;
1889 cv_destroy(&we.we_cv);
1891 for (i = 0; i < cnt; i++) {
1892 if (whead[i].wb_object != NULL)
1893 RemoveEntryList(&whead[i].wb_waitlist);
1896 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1902 WRITE_REGISTER_USHORT(uint16_t *reg, uint16_t val)
1904 bus_space_write_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val);
1908 READ_REGISTER_USHORT(uint16_t *reg)
1910 return (bus_space_read_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg));
1914 WRITE_REGISTER_ULONG(uint32_t *reg, uint32_t val)
1916 bus_space_write_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val);
1920 READ_REGISTER_ULONG(uint32_t *reg)
1922 return (bus_space_read_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg));
1926 READ_REGISTER_UCHAR(uint8_t *reg)
1928 return (bus_space_read_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg));
1932 WRITE_REGISTER_UCHAR(uint8_t *reg, uint8_t val)
1934 bus_space_write_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val);
1938 _allmul(int64_t a, int64_t b)
1944 _alldiv(int64_t a, int64_t b)
1950 _allrem(int64_t a, int64_t b)
1956 _aullmul(uint64_t a, uint64_t b)
1962 _aulldiv(uint64_t a, uint64_t b)
1968 _aullrem(uint64_t a, uint64_t b)
1974 _allshl(int64_t a, uint8_t b)
1980 _aullshl(uint64_t a, uint8_t b)
1986 _allshr(int64_t a, uint8_t b)
1992 _aullshr(uint64_t a, uint8_t b)
1997 static slist_entry *
1998 ntoskrnl_pushsl(slist_header *head, slist_entry *entry)
2000 slist_entry *oldhead;
2002 oldhead = head->slh_list.slh_next;
2003 entry->sl_next = head->slh_list.slh_next;
2004 head->slh_list.slh_next = entry;
2005 head->slh_list.slh_depth++;
2006 head->slh_list.slh_seq++;
2012 InitializeSListHead(slist_header *head)
2014 memset(head, 0, sizeof(*head));
2017 static slist_entry *
2018 ntoskrnl_popsl(slist_header *head)
2022 first = head->slh_list.slh_next;
2023 if (first != NULL) {
2024 head->slh_list.slh_next = first->sl_next;
2025 head->slh_list.slh_depth--;
2026 head->slh_list.slh_seq++;
2033 * We need this to make lookaside lists work for amd64.
2034 * We pass a pointer to ExAllocatePoolWithTag() the lookaside
2035 * list structure. For amd64 to work right, this has to be a
2036 * pointer to the wrapped version of the routine, not the
2037 * original. Letting the Windows driver invoke the original
2038 * function directly will result in a convention calling
2039 * mismatch and a pretty crash. On x86, this effectively
2040 * becomes a no-op since ipt_func and ipt_wrap are the same.
2044 ntoskrnl_findwrap(funcptr func)
2046 image_patch_table *patch;
2048 patch = ntoskrnl_functbl;
2049 while (patch->ipt_func != NULL) {
2050 if ((funcptr)patch->ipt_func == func)
2051 return ((funcptr)patch->ipt_wrap);
2059 ExInitializePagedLookasideList(paged_lookaside_list *lookaside,
2060 lookaside_alloc_func *allocfunc, lookaside_free_func *freefunc,
2061 uint32_t flags, size_t size, uint32_t tag, uint16_t depth)
2063 bzero((char *)lookaside, sizeof(paged_lookaside_list));
2065 if (size < sizeof(slist_entry))
2066 lookaside->nll_l.gl_size = sizeof(slist_entry);
2068 lookaside->nll_l.gl_size = size;
2069 lookaside->nll_l.gl_tag = tag;
2070 if (allocfunc == NULL)
2071 lookaside->nll_l.gl_allocfunc =
2072 ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag);
2074 lookaside->nll_l.gl_allocfunc = allocfunc;
2076 if (freefunc == NULL)
2077 lookaside->nll_l.gl_freefunc =
2078 ntoskrnl_findwrap((funcptr)ExFreePool);
2080 lookaside->nll_l.gl_freefunc = freefunc;
2082 lookaside->nll_l.gl_type = NonPagedPool;
2083 lookaside->nll_l.gl_depth = depth;
2084 lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH;
2088 ExDeletePagedLookasideList(paged_lookaside_list *lookaside)
2091 void (*freefunc)(void *);
2093 freefunc = lookaside->nll_l.gl_freefunc;
2094 while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL)
2095 MSCALL1(freefunc, buf);
2099 ExInitializeNPagedLookasideList(npaged_lookaside_list *lookaside,
2100 lookaside_alloc_func *allocfunc, lookaside_free_func *freefunc,
2101 uint32_t flags, size_t size, uint32_t tag, uint16_t depth)
2103 bzero((char *)lookaside, sizeof(npaged_lookaside_list));
2105 if (size < sizeof(slist_entry))
2106 lookaside->nll_l.gl_size = sizeof(slist_entry);
2108 lookaside->nll_l.gl_size = size;
2109 lookaside->nll_l.gl_tag = tag;
2110 if (allocfunc == NULL)
2111 lookaside->nll_l.gl_allocfunc =
2112 ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag);
2114 lookaside->nll_l.gl_allocfunc = allocfunc;
2116 if (freefunc == NULL)
2117 lookaside->nll_l.gl_freefunc =
2118 ntoskrnl_findwrap((funcptr)ExFreePool);
2120 lookaside->nll_l.gl_freefunc = freefunc;
2122 lookaside->nll_l.gl_type = NonPagedPool;
2123 lookaside->nll_l.gl_depth = depth;
2124 lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH;
2128 ExDeleteNPagedLookasideList(npaged_lookaside_list *lookaside)
2131 void (*freefunc)(void *);
2133 freefunc = lookaside->nll_l.gl_freefunc;
2134 while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL)
2135 MSCALL1(freefunc, buf);
2139 InterlockedPushEntrySList(slist_header *head, slist_entry *entry)
2141 slist_entry *oldhead;
2143 mtx_spinlock(&ntoskrnl_interlock);
2144 oldhead = ntoskrnl_pushsl(head, entry);
2145 mtx_spinunlock(&ntoskrnl_interlock);
2151 InterlockedPopEntrySList(slist_header *head)
2155 mtx_spinlock(&ntoskrnl_interlock);
2156 first = ntoskrnl_popsl(head);
2157 mtx_spinunlock(&ntoskrnl_interlock);
2162 static slist_entry *
2163 ExInterlockedPushEntrySList(slist_header *head, slist_entry *entry,
2166 return (InterlockedPushEntrySList(head, entry));
2169 static slist_entry *
2170 ExInterlockedPopEntrySList(slist_header *head, kspin_lock *lock)
2172 return (InterlockedPopEntrySList(head));
2176 ExQueryDepthSList(slist_header *head)
2180 mtx_spinlock(&ntoskrnl_interlock);
2181 depth = head->slh_list.slh_depth;
2182 mtx_spinunlock(&ntoskrnl_interlock);
2188 KeInitializeSpinLock(kspin_lock *lock)
2194 KeAcquireSpinLockAtDpcLevel(kspin_lock *lock)
2196 while (atomic_cmpset_acq_int((volatile u_int *)lock, 0, 1) == 0)
2201 KeReleaseSpinLockFromDpcLevel(kspin_lock *lock)
2203 atomic_store_rel_int((volatile u_int *)lock, 0);
2207 InterlockedExchange(volatile uint32_t *dst, uintptr_t val)
2211 mtx_spinlock(&ntoskrnl_interlock);
2214 mtx_spinunlock(&ntoskrnl_interlock);
2220 InterlockedIncrement(volatile uint32_t *addend)
2222 atomic_add_long((volatile u_long *)addend, 1);
2227 InterlockedDecrement(volatile uint32_t *addend)
2229 atomic_subtract_long((volatile u_long *)addend, 1);
2234 ExInterlockedAddLargeStatistic(uint64_t *addend, uint32_t inc)
2236 mtx_spinlock(&ntoskrnl_interlock);
2238 mtx_spinunlock(&ntoskrnl_interlock);
2242 IoAllocateMdl(void *vaddr, uint32_t len, uint8_t secondarybuf,
2243 uint8_t chargequota, irp *iopkt)
2248 if (MmSizeOfMdl(vaddr, len) > MDL_ZONE_SIZE)
2249 m = ExAllocatePoolWithTag(NonPagedPool,
2250 MmSizeOfMdl(vaddr, len), 0);
2252 m = objcache_get(mdl_cache, M_NOWAIT);
2253 bzero(m, sizeof(mdl));
2260 MmInitializeMdl(m, vaddr, len);
2263 * MmInitializMdl() clears the flags field, so we
2264 * have to set this here. If the MDL came from the
2265 * MDL UMA zone, tag it so we can release it to
2266 * the right place later.
2269 m->mdl_flags = MDL_ZONE_ALLOCED;
2271 if (iopkt != NULL) {
2272 if (secondarybuf == TRUE) {
2274 last = iopkt->irp_mdl;
2275 while (last->mdl_next != NULL)
2276 last = last->mdl_next;
2279 if (iopkt->irp_mdl != NULL)
2280 panic("leaking an MDL in IoAllocateMdl()");
2294 if (m->mdl_flags & MDL_ZONE_ALLOCED)
2295 objcache_put(mdl_cache, m);
2301 MmAllocateContiguousMemory(uint32_t size, uint64_t highest)
2304 size_t pagelength = roundup(size, PAGE_SIZE);
2306 addr = ExAllocatePoolWithTag(NonPagedPool, pagelength, 0);
2311 #if 0 /* XXX swildner */
2313 MmAllocateContiguousMemorySpecifyCache(uint32_t size, uint64_t lowest,
2314 uint64_t highest, uint64_t boundary, enum nt_caching_type cachetype)
2316 vm_memattr_t memattr;
2319 switch (cachetype) {
2321 memattr = VM_MEMATTR_UNCACHEABLE;
2323 case MmWriteCombined:
2324 memattr = VM_MEMATTR_WRITE_COMBINING;
2326 case MmNonCachedUnordered:
2327 memattr = VM_MEMATTR_UNCACHEABLE;
2330 case MmHardwareCoherentCached:
2333 memattr = VM_MEMATTR_DEFAULT;
2337 ret = (void *)kmem_alloc_contig(kernel_map, size, M_ZERO | M_NOWAIT,
2338 lowest, highest, PAGE_SIZE, boundary, memattr);
2340 malloc_type_allocated(M_DEVBUF, round_page(size));
2345 MmAllocateContiguousMemorySpecifyCache(uint32_t size, uint64_t lowest,
2346 uint64_t highest, uint64_t boundary, enum nt_caching_type cachetype)
2350 size_t pagelength = roundup(size, PAGE_SIZE);
2352 addr = ExAllocatePoolWithTag(NonPagedPool, pagelength, 0);
2356 panic("%s", __func__);
2362 MmFreeContiguousMemory(void *base)
2368 MmFreeContiguousMemorySpecifyCache(void *base, uint32_t size,
2369 enum nt_caching_type cachetype)
2371 contigfree(base, size, M_DEVBUF);
2375 MmSizeOfMdl(void *vaddr, size_t len)
2379 l = sizeof(struct mdl) +
2380 (sizeof(vm_offset_t *) * SPAN_PAGES(vaddr, len));
2386 * The Microsoft documentation says this routine fills in the
2387 * page array of an MDL with the _physical_ page addresses that
2388 * comprise the buffer, but we don't really want to do that here.
2389 * Instead, we just fill in the page array with the kernel virtual
2390 * addresses of the buffers.
2393 MmBuildMdlForNonPagedPool(mdl *m)
2395 vm_offset_t *mdl_pages;
2398 pagecnt = SPAN_PAGES(m->mdl_byteoffset, m->mdl_bytecount);
2400 if (pagecnt > (m->mdl_size - sizeof(mdl)) / sizeof(vm_offset_t *))
2401 panic("not enough pages in MDL to describe buffer");
2403 mdl_pages = MmGetMdlPfnArray(m);
2405 for (i = 0; i < pagecnt; i++)
2406 *mdl_pages = (vm_offset_t)m->mdl_startva + (i * PAGE_SIZE);
2408 m->mdl_flags |= MDL_SOURCE_IS_NONPAGED_POOL;
2409 m->mdl_mappedsystemva = MmGetMdlVirtualAddress(m);
2413 MmMapLockedPages(mdl *buf, uint8_t accessmode)
2415 buf->mdl_flags |= MDL_MAPPED_TO_SYSTEM_VA;
2416 return (MmGetMdlVirtualAddress(buf));
2420 MmMapLockedPagesSpecifyCache(mdl *buf, uint8_t accessmode, uint32_t cachetype,
2421 void *vaddr, uint32_t bugcheck, uint32_t prio)
2423 return (MmMapLockedPages(buf, accessmode));
2427 MmUnmapLockedPages(void *vaddr, mdl *buf)
2429 buf->mdl_flags &= ~MDL_MAPPED_TO_SYSTEM_VA;
2433 * This function has a problem in that it will break if you
2434 * compile this module without PAE and try to use it on a PAE
2435 * kernel. Unfortunately, there's no way around this at the
2436 * moment. It's slightly less broken that using pmap_kextract().
2437 * You'd think the virtual memory subsystem would help us out
2438 * here, but it doesn't.
2442 MmGetPhysicalAddress(void *base)
2444 return (pmap_kextract((vm_offset_t)base));
2448 MmGetSystemRoutineAddress(unicode_string *ustr)
2452 if (RtlUnicodeStringToAnsiString(&astr, ustr, TRUE))
2454 return (ndis_get_routine_address(ntoskrnl_functbl, astr.as_buf));
2458 MmIsAddressValid(void *vaddr)
2460 if (pmap_kextract((vm_offset_t)vaddr))
2467 MmMapIoSpace(uint64_t paddr, uint32_t len, uint32_t cachetype)
2469 devclass_t nexus_class;
2470 device_t *nexus_devs, devp;
2471 int nexus_count = 0;
2472 device_t matching_dev = NULL;
2473 struct resource *res;
2477 /* There will always be at least one nexus. */
2479 nexus_class = devclass_find("nexus");
2480 devclass_get_devices(nexus_class, &nexus_devs, &nexus_count);
2482 for (i = 0; i < nexus_count; i++) {
2483 devp = nexus_devs[i];
2484 matching_dev = ntoskrnl_finddev(devp, paddr, &res);
2489 kfree(nexus_devs, M_TEMP);
2491 if (matching_dev == NULL)
2494 v = (vm_offset_t)rman_get_virtual(res);
2495 if (paddr > rman_get_start(res))
2496 v += paddr - rman_get_start(res);
2502 MmUnmapIoSpace(void *vaddr, size_t len)
2508 ntoskrnl_finddev(device_t dev, uint64_t paddr, struct resource **res)
2510 device_t *children = NULL;
2511 device_t matching_dev;
2514 struct resource_list *rl;
2515 struct resource_list_entry *rle;
2519 /* We only want devices that have been successfully probed. */
2521 if (device_is_alive(dev) == FALSE)
2524 rl = BUS_GET_RESOURCE_LIST(device_get_parent(dev), dev);
2526 SLIST_FOREACH(rle, rl, link) {
2532 flags = rman_get_flags(r);
2534 if (rle->type == SYS_RES_MEMORY &&
2535 paddr >= rman_get_start(r) &&
2536 paddr <= rman_get_end(r)) {
2537 if (!(flags & RF_ACTIVE))
2538 bus_activate_resource(dev,
2539 SYS_RES_MEMORY, 0, r);
2547 * If this device has children, do another
2548 * level of recursion to inspect them.
2551 device_get_children(dev, &children, &childcnt);
2553 for (i = 0; i < childcnt; i++) {
2554 matching_dev = ntoskrnl_finddev(children[i], paddr, res);
2555 if (matching_dev != NULL) {
2556 kfree(children, M_TEMP);
2557 return (matching_dev);
2562 /* Won't somebody please think of the children! */
2564 if (children != NULL)
2565 kfree(children, M_TEMP);
2571 * Workitems are unlike DPCs, in that they run in a user-mode thread
2572 * context rather than at DISPATCH_LEVEL in kernel context. In our
2573 * case we run them in kernel context anyway.
2576 ntoskrnl_workitem_thread(void *arg)
2585 InitializeListHead(&kq->kq_disp);
2586 kq->kq_td = curthread;
2588 KeInitializeSpinLock(&kq->kq_lock);
2589 KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE);
2592 KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL);
2594 KeAcquireSpinLock(&kq->kq_lock, &irql);
2598 KeReleaseSpinLock(&kq->kq_lock, irql);
2602 while (!IsListEmpty(&kq->kq_disp)) {
2603 l = RemoveHeadList(&kq->kq_disp);
2604 iw = CONTAINING_RECORD(l,
2605 io_workitem, iw_listentry);
2606 InitializeListHead((&iw->iw_listentry));
2607 if (iw->iw_func == NULL)
2609 KeReleaseSpinLock(&kq->kq_lock, irql);
2610 MSCALL2(iw->iw_func, iw->iw_dobj, iw->iw_ctx);
2611 KeAcquireSpinLock(&kq->kq_lock, &irql);
2614 KeReleaseSpinLock(&kq->kq_lock, irql);
2619 return; /* notreached */
2623 RtlCharToInteger(const char *src, uint32_t base, uint32_t *val)
2629 return (STATUS_ACCESS_VIOLATION);
2630 while (*src != '\0' && *src <= ' ')
2634 else if (*src == '-') {
2645 } else if (*src == 'o') {
2648 } else if (*src == 'x') {
2653 } else if (!(base == 2 || base == 8 || base == 10 || base == 16))
2654 return (STATUS_INVALID_PARAMETER);
2656 for (res = 0; *src; src++) {
2660 else if (isxdigit(*src))
2661 v = tolower(*src) - 'a' + 10;
2665 return (STATUS_INVALID_PARAMETER);
2666 res = res * base + v;
2668 *val = negative ? -res : res;
2669 return (STATUS_SUCCESS);
2673 ntoskrnl_destroy_workitem_threads(void)
2678 for (i = 0; i < WORKITEM_THREADS; i++) {
2681 KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
2683 tsleep(kq->kq_td, 0, "waitiw", hz/10);
2688 IoAllocateWorkItem(device_object *dobj)
2692 iw = objcache_get(iw_cache, M_NOWAIT);
2696 InitializeListHead(&iw->iw_listentry);
2699 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
2700 iw->iw_idx = wq_idx;
2701 WORKIDX_INC(wq_idx);
2702 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
2708 IoFreeWorkItem(io_workitem *iw)
2710 objcache_put(iw_cache, iw);
2714 IoQueueWorkItem(io_workitem *iw, io_workitem_func iw_func, uint32_t qtype,
2722 kq = wq_queues + iw->iw_idx;
2724 KeAcquireSpinLock(&kq->kq_lock, &irql);
2727 * Traverse the list and make sure this workitem hasn't
2728 * already been inserted. Queuing the same workitem
2729 * twice will hose the list but good.
2732 l = kq->kq_disp.nle_flink;
2733 while (l != &kq->kq_disp) {
2734 cur = CONTAINING_RECORD(l, io_workitem, iw_listentry);
2736 /* Already queued -- do nothing. */
2737 KeReleaseSpinLock(&kq->kq_lock, irql);
2743 iw->iw_func = iw_func;
2746 InsertTailList((&kq->kq_disp), (&iw->iw_listentry));
2747 KeReleaseSpinLock(&kq->kq_lock, irql);
2749 KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
2753 ntoskrnl_workitem(device_object *dobj, void *arg)
2760 w = (work_queue_item *)dobj;
2761 f = (work_item_func)w->wqi_func;
2762 objcache_put(iw_cache, iw);
2763 MSCALL2(f, w, w->wqi_ctx);
2767 * The ExQueueWorkItem() API is deprecated in Windows XP. Microsoft
2768 * warns that it's unsafe and to use IoQueueWorkItem() instead. The
2769 * problem with ExQueueWorkItem() is that it can't guard against
2770 * the condition where a driver submits a job to the work queue and
2771 * is then unloaded before the job is able to run. IoQueueWorkItem()
2772 * acquires a reference to the device's device_object via the
2773 * object manager and retains it until after the job has completed,
2774 * which prevents the driver from being unloaded before the job
2775 * runs. (We don't currently support this behavior, though hopefully
2776 * that will change once the object manager API is fleshed out a bit.)
2778 * Having said all that, the ExQueueWorkItem() API remains, because
2779 * there are still other parts of Windows that use it, including
2780 * NDIS itself: NdisScheduleWorkItem() calls ExQueueWorkItem().
2781 * We fake up the ExQueueWorkItem() API on top of our implementation
2782 * of IoQueueWorkItem(). Workitem thread #3 is reserved exclusively
2783 * for ExQueueWorkItem() jobs, and we pass a pointer to the work
2784 * queue item (provided by the caller) in to IoAllocateWorkItem()
2785 * instead of the device_object. We need to save this pointer so
2786 * we can apply a sanity check: as with the DPC queue and other
2787 * workitem queues, we can't allow the same work queue item to
2788 * be queued twice. If it's already pending, we silently return
2792 ExQueueWorkItem(work_queue_item *w, uint32_t qtype)
2795 io_workitem_func iwf;
2803 * We need to do a special sanity test to make sure
2804 * the ExQueueWorkItem() API isn't used to queue
2805 * the same workitem twice. Rather than checking the
2806 * io_workitem pointer itself, we test the attached
2807 * device object, which is really a pointer to the
2808 * legacy work queue item structure.
2811 kq = wq_queues + WORKITEM_LEGACY_THREAD;
2812 KeAcquireSpinLock(&kq->kq_lock, &irql);
2813 l = kq->kq_disp.nle_flink;
2814 while (l != &kq->kq_disp) {
2815 cur = CONTAINING_RECORD(l, io_workitem, iw_listentry);
2816 if (cur->iw_dobj == (device_object *)w) {
2817 /* Already queued -- do nothing. */
2818 KeReleaseSpinLock(&kq->kq_lock, irql);
2823 KeReleaseSpinLock(&kq->kq_lock, irql);
2825 iw = IoAllocateWorkItem((device_object *)w);
2829 iw->iw_idx = WORKITEM_LEGACY_THREAD;
2830 iwf = (io_workitem_func)ntoskrnl_findwrap((funcptr)ntoskrnl_workitem);
2831 IoQueueWorkItem(iw, iwf, qtype, iw);
2835 RtlZeroMemory(void *dst, size_t len)
2841 RtlSecureZeroMemory(void *dst, size_t len)
2843 memset(dst, 0, len);
2847 RtlFillMemory(void *dst, size_t len, uint8_t c)
2849 memset(dst, c, len);
2853 RtlMoveMemory(void *dst, const void *src, size_t len)
2855 memmove(dst, src, len);
2859 RtlCopyMemory(void *dst, const void *src, size_t len)
2861 bcopy(src, dst, len);
2865 RtlCompareMemory(const void *s1, const void *s2, size_t len)
2870 m1 = __DECONST(char *, s1);
2871 m2 = __DECONST(char *, s2);
2873 for (i = 0; i < len && m1[i] == m2[i]; i++);
2878 RtlInitAnsiString(ansi_string *dst, char *src)
2886 a->as_len = a->as_maxlen = 0;
2890 a->as_len = a->as_maxlen = strlen(src);
2895 RtlInitUnicodeString(unicode_string *dst, uint16_t *src)
2904 u->us_len = u->us_maxlen = 0;
2911 u->us_len = u->us_maxlen = i * 2;
2916 RtlUnicodeStringToInteger(unicode_string *ustr, uint32_t base, uint32_t *val)
2923 uchr = ustr->us_buf;
2925 bzero(abuf, sizeof(abuf));
2927 if ((char)((*uchr) & 0xFF) == '-') {
2931 } else if ((char)((*uchr) & 0xFF) == '+') {
2938 if ((char)((*uchr) & 0xFF) == 'b') {
2942 } else if ((char)((*uchr) & 0xFF) == 'o') {
2946 } else if ((char)((*uchr) & 0xFF) == 'x') {
2960 ntoskrnl_unicode_to_ascii(uchr, astr, len);
2961 *val = strtoul(abuf, NULL, base);
2963 return (STATUS_SUCCESS);
2967 RtlFreeUnicodeString(unicode_string *ustr)
2969 if (ustr->us_buf == NULL)
2971 ExFreePool(ustr->us_buf);
2972 ustr->us_buf = NULL;
2976 RtlFreeAnsiString(ansi_string *astr)
2978 if (astr->as_buf == NULL)
2980 ExFreePool(astr->as_buf);
2981 astr->as_buf = NULL;
2985 atoi(const char *str)
2987 return (int)strtol(str, NULL, 10);
2991 atol(const char *str)
2993 return strtol(str, NULL, 10);
3002 skrandom(tv.tv_usec);
3003 return ((int)krandom());
3007 srand(unsigned int seed)
3013 IoIsWdmVersionAvailable(uint8_t major, uint8_t minor)
3015 if (major == WDM_MAJOR && minor == WDM_MINOR_WINXP)
3021 IoOpenDeviceRegistryKey(struct device_object *devobj, uint32_t type,
3022 uint32_t mask, void **key)
3024 return (NDIS_STATUS_INVALID_DEVICE_REQUEST);
3028 IoGetDeviceObjectPointer(unicode_string *name, uint32_t reqaccess,
3029 void *fileobj, device_object *devobj)
3031 return (STATUS_SUCCESS);
3035 IoGetDeviceProperty(device_object *devobj, uint32_t regprop, uint32_t buflen,
3036 void *prop, uint32_t *reslen)
3041 drv = devobj->do_drvobj;
3044 case DEVPROP_DRIVER_KEYNAME:
3046 *name = drv->dro_drivername.us_buf;
3047 *reslen = drv->dro_drivername.us_len;
3050 return (STATUS_INVALID_PARAMETER_2);
3054 return (STATUS_SUCCESS);
3058 KeInitializeMutex(kmutant *kmutex, uint32_t level)
3060 InitializeListHead((&kmutex->km_header.dh_waitlisthead));
3061 kmutex->km_abandoned = FALSE;
3062 kmutex->km_apcdisable = 1;
3063 kmutex->km_header.dh_sigstate = 1;
3064 kmutex->km_header.dh_type = DISP_TYPE_MUTANT;
3065 kmutex->km_header.dh_size = sizeof(kmutant) / sizeof(uint32_t);
3066 kmutex->km_ownerthread = NULL;
3070 KeReleaseMutex(kmutant *kmutex, uint8_t kwait)
3074 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3075 prevstate = kmutex->km_header.dh_sigstate;
3076 if (kmutex->km_ownerthread != curthread) {
3077 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3078 return (STATUS_MUTANT_NOT_OWNED);
3081 kmutex->km_header.dh_sigstate++;
3082 kmutex->km_abandoned = FALSE;
3084 if (kmutex->km_header.dh_sigstate == 1) {
3085 kmutex->km_ownerthread = NULL;
3086 ntoskrnl_waittest(&kmutex->km_header, IO_NO_INCREMENT);
3089 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3095 KeReadStateMutex(kmutant *kmutex)
3097 return (kmutex->km_header.dh_sigstate);
3101 KeInitializeEvent(nt_kevent *kevent, uint32_t type, uint8_t state)
3103 InitializeListHead((&kevent->k_header.dh_waitlisthead));
3104 kevent->k_header.dh_sigstate = state;
3105 if (type == EVENT_TYPE_NOTIFY)
3106 kevent->k_header.dh_type = DISP_TYPE_NOTIFICATION_EVENT;
3108 kevent->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_EVENT;
3109 kevent->k_header.dh_size = sizeof(nt_kevent) / sizeof(uint32_t);
3113 KeResetEvent(nt_kevent *kevent)
3117 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3118 prevstate = kevent->k_header.dh_sigstate;
3119 kevent->k_header.dh_sigstate = FALSE;
3120 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3126 KeSetEvent(nt_kevent *kevent, uint32_t increment, uint8_t kwait)
3130 nt_dispatch_header *dh;
3133 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3134 prevstate = kevent->k_header.dh_sigstate;
3135 dh = &kevent->k_header;
3137 if (IsListEmpty(&dh->dh_waitlisthead))
3139 * If there's nobody in the waitlist, just set
3140 * the state to signalled.
3142 dh->dh_sigstate = 1;
3145 * Get the first waiter. If this is a synchronization
3146 * event, just wake up that one thread (don't bother
3147 * setting the state to signalled since we're supposed
3148 * to automatically clear synchronization events anyway).
3150 * If it's a notification event, or the first
3151 * waiter is doing a WAITTYPE_ALL wait, go through
3152 * the full wait satisfaction process.
3154 w = CONTAINING_RECORD(dh->dh_waitlisthead.nle_flink,
3155 wait_block, wb_waitlist);
3157 if (kevent->k_header.dh_type == DISP_TYPE_NOTIFICATION_EVENT ||
3158 w->wb_waittype == WAITTYPE_ALL) {
3159 if (prevstate == 0) {
3160 dh->dh_sigstate = 1;
3161 ntoskrnl_waittest(dh, increment);
3164 w->wb_awakened |= TRUE;
3165 cv_broadcastpri(&we->we_cv,
3166 (w->wb_oldpri - (increment * 4)) > PRI_MIN_KERN ?
3167 w->wb_oldpri - (increment * 4) : PRI_MIN_KERN);
3171 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3177 KeClearEvent(nt_kevent *kevent)
3179 kevent->k_header.dh_sigstate = FALSE;
3183 KeReadStateEvent(nt_kevent *kevent)
3185 return (kevent->k_header.dh_sigstate);
3189 * The object manager in Windows is responsible for managing
3190 * references and access to various types of objects, including
3191 * device_objects, events, threads, timers and so on. However,
3192 * there's a difference in the way objects are handled in user
3193 * mode versus kernel mode.
3195 * In user mode (i.e. Win32 applications), all objects are
3196 * managed by the object manager. For example, when you create
3197 * a timer or event object, you actually end up with an
3198 * object_header (for the object manager's bookkeeping
3199 * purposes) and an object body (which contains the actual object
3200 * structure, e.g. ktimer, kevent, etc...). This allows Windows
3201 * to manage resource quotas and to enforce access restrictions
3202 * on basically every kind of system object handled by the kernel.
3204 * However, in kernel mode, you only end up using the object
3205 * manager some of the time. For example, in a driver, you create
3206 * a timer object by simply allocating the memory for a ktimer
3207 * structure and initializing it with KeInitializeTimer(). Hence,
3208 * the timer has no object_header and no reference counting or
3209 * security/resource checks are done on it. The assumption in
3210 * this case is that if you're running in kernel mode, you know
3211 * what you're doing, and you're already at an elevated privilege
3214 * There are some exceptions to this. The two most important ones
3215 * for our purposes are device_objects and threads. We need to use
3216 * the object manager to do reference counting on device_objects,
3217 * and for threads, you can only get a pointer to a thread's
3218 * dispatch header by using ObReferenceObjectByHandle() on the
3219 * handle returned by PsCreateSystemThread().
3223 ObReferenceObjectByHandle(ndis_handle handle, uint32_t reqaccess, void *otype,
3224 uint8_t accessmode, void **object, void **handleinfo)
3228 nr = kmalloc(sizeof(nt_objref), M_DEVBUF, M_NOWAIT|M_ZERO);
3230 return (STATUS_INSUFFICIENT_RESOURCES);
3232 InitializeListHead((&nr->no_dh.dh_waitlisthead));
3233 nr->no_obj = handle;
3234 nr->no_dh.dh_type = DISP_TYPE_THREAD;
3235 nr->no_dh.dh_sigstate = 0;
3236 nr->no_dh.dh_size = (uint8_t)(sizeof(struct thread) /
3238 TAILQ_INSERT_TAIL(&ntoskrnl_reflist, nr, link);
3241 return (STATUS_SUCCESS);
3245 ObfDereferenceObject(void *object)
3250 TAILQ_REMOVE(&ntoskrnl_reflist, nr, link);
3251 kfree(nr, M_DEVBUF);
3255 ZwClose(ndis_handle handle)
3257 return (STATUS_SUCCESS);
3261 WmiQueryTraceInformation(uint32_t traceclass, void *traceinfo,
3262 uint32_t infolen, uint32_t reqlen, void *buf)
3264 return (STATUS_NOT_FOUND);
3268 WmiTraceMessage(uint64_t loghandle, uint32_t messageflags,
3269 void *guid, uint16_t messagenum, ...)
3271 return (STATUS_SUCCESS);
3275 IoWMIRegistrationControl(device_object *dobj, uint32_t action)
3277 return (STATUS_SUCCESS);
3281 * This is here just in case the thread returns without calling
3282 * PsTerminateSystemThread().
3285 ntoskrnl_thrfunc(void *arg)
3287 thread_context *thrctx;
3288 uint32_t (*tfunc)(void *);
3293 tfunc = thrctx->tc_thrfunc;
3294 tctx = thrctx->tc_thrctx;
3295 kfree(thrctx, M_TEMP);
3297 rval = MSCALL1(tfunc, tctx);
3299 PsTerminateSystemThread(rval);
3300 return; /* notreached */
3304 PsCreateSystemThread(ndis_handle *handle, uint32_t reqaccess, void *objattrs,
3305 ndis_handle phandle, void *clientid, void *thrfunc, void *thrctx)
3311 tc = kmalloc(sizeof(thread_context), M_TEMP, M_NOWAIT);
3313 return (STATUS_INSUFFICIENT_RESOURCES);
3315 tc->tc_thrctx = thrctx;
3316 tc->tc_thrfunc = thrfunc;
3318 error = kthread_create(ntoskrnl_thrfunc, tc, &p, "Win kthread %d",
3323 return (STATUS_INSUFFICIENT_RESOURCES);
3329 return (STATUS_SUCCESS);
3333 * In Windows, the exit of a thread is an event that you're allowed
3334 * to wait on, assuming you've obtained a reference to the thread using
3335 * ObReferenceObjectByHandle(). Unfortunately, the only way we can
3336 * simulate this behavior is to register each thread we create in a
3337 * reference list, and if someone holds a reference to us, we poke
3341 PsTerminateSystemThread(ndis_status status)
3343 struct nt_objref *nr;
3345 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3346 TAILQ_FOREACH(nr, &ntoskrnl_reflist, link) {
3347 if (nr->no_obj != curthread->td_proc)
3349 nr->no_dh.dh_sigstate = 1;
3350 ntoskrnl_waittest(&nr->no_dh, IO_NO_INCREMENT);
3353 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3359 return (0); /* notreached */
3363 DbgPrint(char *fmt, ...)
3368 __va_start(ap, fmt);
3373 return (STATUS_SUCCESS);
3380 Debugger("DbgBreakPoint(): breakpoint");
3384 KeBugCheckEx(uint32_t code, u_long param1, u_long param2, u_long param3,
3387 panic("KeBugCheckEx: STOP 0x%X", code);
3391 ntoskrnl_timercall(void *arg)
3397 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3401 #ifdef NTOSKRNL_DEBUG_TIMERS
3402 ntoskrnl_timer_fires++;
3404 ntoskrnl_remove_timer(timer);
3407 * This should never happen, but complain
3411 if (timer->k_header.dh_inserted == FALSE) {
3412 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3413 kprintf("NTOS: timer %p fired even though "
3414 "it was canceled\n", timer);
3418 /* Mark the timer as no longer being on the timer queue. */
3420 timer->k_header.dh_inserted = FALSE;
3422 /* Now signal the object and satisfy any waits on it. */
3424 timer->k_header.dh_sigstate = 1;
3425 ntoskrnl_waittest(&timer->k_header, IO_NO_INCREMENT);
3428 * If this is a periodic timer, re-arm it
3429 * so it will fire again. We do this before
3430 * calling any deferred procedure calls because
3431 * it's possible the DPC might cancel the timer,
3432 * in which case it would be wrong for us to
3433 * re-arm it again afterwards.
3436 if (timer->k_period) {
3438 tv.tv_usec = timer->k_period * 1000;
3439 timer->k_header.dh_inserted = TRUE;
3440 ntoskrnl_insert_timer(timer, tvtohz_high(&tv));
3441 #ifdef NTOSKRNL_DEBUG_TIMERS
3442 ntoskrnl_timer_reloads++;
3448 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3450 /* If there's a DPC associated with the timer, queue it up. */
3453 KeInsertQueueDpc(dpc, NULL, NULL);
3456 #ifdef NTOSKRNL_DEBUG_TIMERS
3458 sysctl_show_timers(SYSCTL_HANDLER_ARGS)
3463 ntoskrnl_show_timers();
3464 return (sysctl_handle_int(oidp, &ret, 0, req));
3468 ntoskrnl_show_timers(void)
3473 mtx_spinlock(&ntoskrnl_calllock);
3474 l = ntoskrnl_calllist.nle_flink;
3475 while(l != &ntoskrnl_calllist) {
3479 mtx_spinunlock(&ntoskrnl_calllock);
3482 kprintf("%d timers available (out of %d)\n", i, NTOSKRNL_TIMEOUTS);
3483 kprintf("timer sets: %qu\n", ntoskrnl_timer_sets);
3484 kprintf("timer reloads: %qu\n", ntoskrnl_timer_reloads);
3485 kprintf("timer cancels: %qu\n", ntoskrnl_timer_cancels);
3486 kprintf("timer fires: %qu\n", ntoskrnl_timer_fires);
3492 * Must be called with dispatcher lock held.
3496 ntoskrnl_insert_timer(ktimer *timer, int ticks)
3503 * Try and allocate a timer.
3505 mtx_spinlock(&ntoskrnl_calllock);
3506 if (IsListEmpty(&ntoskrnl_calllist)) {
3507 mtx_spinunlock(&ntoskrnl_calllock);
3508 #ifdef NTOSKRNL_DEBUG_TIMERS
3509 ntoskrnl_show_timers();
3511 panic("out of timers!");
3513 l = RemoveHeadList(&ntoskrnl_calllist);
3514 mtx_spinunlock(&ntoskrnl_calllock);
3516 e = CONTAINING_RECORD(l, callout_entry, ce_list);
3519 timer->k_callout = c;
3522 callout_reset(c, ticks, ntoskrnl_timercall, timer);
3526 ntoskrnl_remove_timer(ktimer *timer)
3530 e = (callout_entry *)timer->k_callout;
3531 callout_stop(timer->k_callout);
3533 mtx_spinlock(&ntoskrnl_calllock);
3534 InsertHeadList((&ntoskrnl_calllist), (&e->ce_list));
3535 mtx_spinunlock(&ntoskrnl_calllock);
3539 KeInitializeTimer(ktimer *timer)
3544 KeInitializeTimerEx(timer, EVENT_TYPE_NOTIFY);
3548 KeInitializeTimerEx(ktimer *timer, uint32_t type)
3553 bzero((char *)timer, sizeof(ktimer));
3554 InitializeListHead((&timer->k_header.dh_waitlisthead));
3555 timer->k_header.dh_sigstate = FALSE;
3556 timer->k_header.dh_inserted = FALSE;
3557 if (type == EVENT_TYPE_NOTIFY)
3558 timer->k_header.dh_type = DISP_TYPE_NOTIFICATION_TIMER;
3560 timer->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_TIMER;
3561 timer->k_header.dh_size = sizeof(ktimer) / sizeof(uint32_t);
3565 * DPC subsystem. A Windows Defered Procedure Call has the following
3567 * - It runs at DISPATCH_LEVEL.
3568 * - It can have one of 3 importance values that control when it
3569 * runs relative to other DPCs in the queue.
3570 * - On SMP systems, it can be set to run on a specific processor.
3571 * In order to satisfy the last property, we create a DPC thread for
3572 * each CPU in the system and bind it to that CPU. Each thread
3573 * maintains three queues with different importance levels, which
3574 * will be processed in order from lowest to highest.
3576 * In Windows, interrupt handlers run as DPCs. (Not to be confused
3577 * with ISRs, which run in interrupt context and can preempt DPCs.)
3578 * ISRs are given the highest importance so that they'll take
3579 * precedence over timers and other things.
3583 ntoskrnl_dpc_thread(void *arg)
3592 InitializeListHead(&kq->kq_disp);
3593 kq->kq_td = curthread;
3595 kq->kq_running = FALSE;
3596 KeInitializeSpinLock(&kq->kq_lock);
3597 KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE);
3598 KeInitializeEvent(&kq->kq_done, EVENT_TYPE_SYNC, FALSE);
3601 * Elevate our priority. DPCs are used to run interrupt
3602 * handlers, and they should trigger as soon as possible
3603 * once scheduled by an ISR.
3606 #ifdef NTOSKRNL_MULTIPLE_DPCS
3607 sched_bind(curthread, kq->kq_cpu);
3609 lwkt_setpri_self(TDPRI_INT_HIGH);
3612 KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL);
3614 KeAcquireSpinLock(&kq->kq_lock, &irql);
3618 KeReleaseSpinLock(&kq->kq_lock, irql);
3622 kq->kq_running = TRUE;
3624 while (!IsListEmpty(&kq->kq_disp)) {
3625 l = RemoveHeadList((&kq->kq_disp));
3626 d = CONTAINING_RECORD(l, kdpc, k_dpclistentry);
3627 InitializeListHead((&d->k_dpclistentry));
3628 KeReleaseSpinLockFromDpcLevel(&kq->kq_lock);
3629 MSCALL4(d->k_deferedfunc, d, d->k_deferredctx,
3630 d->k_sysarg1, d->k_sysarg2);
3631 KeAcquireSpinLockAtDpcLevel(&kq->kq_lock);
3634 kq->kq_running = FALSE;
3636 KeReleaseSpinLock(&kq->kq_lock, irql);
3638 KeSetEvent(&kq->kq_done, IO_NO_INCREMENT, FALSE);
3643 return; /* notreached */
3647 ntoskrnl_destroy_dpc_threads(void)
3654 #ifdef NTOSKRNL_MULTIPLE_DPCS
3655 for (i = 0; i < ncpus; i++) {
3657 for (i = 0; i < 1; i++) {
3662 KeInitializeDpc(&dpc, NULL, NULL);
3663 KeSetTargetProcessorDpc(&dpc, i);
3664 KeInsertQueueDpc(&dpc, NULL, NULL);
3666 tsleep(kq->kq_td, 0, "dpcw", hz/10);
3671 ntoskrnl_insert_dpc(list_entry *head, kdpc *dpc)
3676 l = head->nle_flink;
3678 d = CONTAINING_RECORD(l, kdpc, k_dpclistentry);
3684 if (dpc->k_importance == KDPC_IMPORTANCE_LOW)
3685 InsertTailList((head), (&dpc->k_dpclistentry));
3687 InsertHeadList((head), (&dpc->k_dpclistentry));
3693 KeInitializeDpc(kdpc *dpc, void *dpcfunc, void *dpcctx)
3699 dpc->k_deferedfunc = dpcfunc;
3700 dpc->k_deferredctx = dpcctx;
3701 dpc->k_num = KDPC_CPU_DEFAULT;
3702 dpc->k_importance = KDPC_IMPORTANCE_MEDIUM;
3703 InitializeListHead((&dpc->k_dpclistentry));
3707 KeInsertQueueDpc(kdpc *dpc, void *sysarg1, void *sysarg2)
3718 #ifdef NTOSKRNL_MULTIPLE_DPCS
3719 KeRaiseIrql(DISPATCH_LEVEL, &irql);
3722 * By default, the DPC is queued to run on the same CPU
3723 * that scheduled it.
3726 if (dpc->k_num == KDPC_CPU_DEFAULT)
3727 kq += curthread->td_oncpu;
3730 KeAcquireSpinLockAtDpcLevel(&kq->kq_lock);
3732 KeAcquireSpinLock(&kq->kq_lock, &irql);
3735 r = ntoskrnl_insert_dpc(&kq->kq_disp, dpc);
3737 dpc->k_sysarg1 = sysarg1;
3738 dpc->k_sysarg2 = sysarg2;
3740 KeReleaseSpinLock(&kq->kq_lock, irql);
3745 KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
3751 KeRemoveQueueDpc(kdpc *dpc)
3759 #ifdef NTOSKRNL_MULTIPLE_DPCS
3760 KeRaiseIrql(DISPATCH_LEVEL, &irql);
3762 kq = kq_queues + dpc->k_num;
3764 KeAcquireSpinLockAtDpcLevel(&kq->kq_lock);
3767 KeAcquireSpinLock(&kq->kq_lock, &irql);
3770 if (dpc->k_dpclistentry.nle_flink == &dpc->k_dpclistentry) {
3771 KeReleaseSpinLockFromDpcLevel(&kq->kq_lock);
3776 RemoveEntryList((&dpc->k_dpclistentry));
3777 InitializeListHead((&dpc->k_dpclistentry));
3779 KeReleaseSpinLock(&kq->kq_lock, irql);
3785 KeSetImportanceDpc(kdpc *dpc, uint32_t imp)
3787 if (imp != KDPC_IMPORTANCE_LOW &&
3788 imp != KDPC_IMPORTANCE_MEDIUM &&
3789 imp != KDPC_IMPORTANCE_HIGH)
3792 dpc->k_importance = (uint8_t)imp;
3796 KeSetTargetProcessorDpc(kdpc *dpc, uint8_t cpu)
3805 KeFlushQueuedDpcs(void)
3811 * Poke each DPC queue and wait
3812 * for them to drain.
3815 #ifdef NTOSKRNL_MULTIPLE_DPCS
3816 for (i = 0; i < ncpus; i++) {
3818 for (i = 0; i < 1; i++) {
3821 KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
3822 KeWaitForSingleObject(&kq->kq_done, 0, 0, TRUE, NULL);
3827 KeGetCurrentProcessorNumber(void)
3829 return (curthread->td_gd->gd_cpuid);
3833 KeSetTimerEx(ktimer *timer, int64_t duetime, uint32_t period, kdpc *dpc)
3842 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3844 if (timer->k_header.dh_inserted == TRUE) {
3845 ntoskrnl_remove_timer(timer);
3846 #ifdef NTOSKRNL_DEBUG_TIMERS
3847 ntoskrnl_timer_cancels++;
3849 timer->k_header.dh_inserted = FALSE;
3854 timer->k_duetime = duetime;
3855 timer->k_period = period;
3856 timer->k_header.dh_sigstate = FALSE;
3860 tv.tv_sec = - (duetime) / 10000000;
3861 tv.tv_usec = (- (duetime) / 10) -
3862 (tv.tv_sec * 1000000);
3864 ntoskrnl_time(&curtime);
3865 if (duetime < curtime)
3866 tv.tv_sec = tv.tv_usec = 0;
3868 tv.tv_sec = ((duetime) - curtime) / 10000000;
3869 tv.tv_usec = ((duetime) - curtime) / 10 -
3870 (tv.tv_sec * 1000000);
3874 timer->k_header.dh_inserted = TRUE;
3875 ntoskrnl_insert_timer(timer, tvtohz_high(&tv));
3876 #ifdef NTOSKRNL_DEBUG_TIMERS
3877 ntoskrnl_timer_sets++;
3880 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3886 KeSetTimer(ktimer *timer, int64_t duetime, kdpc *dpc)
3888 return (KeSetTimerEx(timer, duetime, 0, dpc));
3892 * The Windows DDK documentation seems to say that cancelling
3893 * a timer that has a DPC will result in the DPC also being
3894 * cancelled, but this isn't really the case.
3898 KeCancelTimer(ktimer *timer)
3905 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3907 pending = timer->k_header.dh_inserted;
3909 if (timer->k_header.dh_inserted == TRUE) {
3910 timer->k_header.dh_inserted = FALSE;
3911 ntoskrnl_remove_timer(timer);
3912 #ifdef NTOSKRNL_DEBUG_TIMERS
3913 ntoskrnl_timer_cancels++;
3917 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3923 KeReadStateTimer(ktimer *timer)
3925 return (timer->k_header.dh_sigstate);
3929 KeDelayExecutionThread(uint8_t wait_mode, uint8_t alertable, int64_t *interval)
3934 panic("invalid wait_mode %d", wait_mode);
3936 KeInitializeTimer(&timer);
3937 KeSetTimer(&timer, *interval, NULL);
3938 KeWaitForSingleObject(&timer, 0, 0, alertable, NULL);
3940 return STATUS_SUCCESS;
3944 KeQueryInterruptTime(void)
3949 getmicrouptime(&tv);
3951 ticks = tvtohz_high(&tv);
3953 return ticks * ((10000000 + hz - 1) / hz);
3956 static struct thread *
3957 KeGetCurrentThread(void)
3964 KeSetPriorityThread(struct thread *td, int32_t pri)
3969 return LOW_REALTIME_PRIORITY;
3971 if (td->td_pri >= TDPRI_INT_HIGH)
3972 old = HIGH_PRIORITY;
3973 else if (td->td_pri <= TDPRI_IDLE_WORK)
3976 old = LOW_REALTIME_PRIORITY;
3978 if (pri == HIGH_PRIORITY)
3979 lwkt_setpri(td, TDPRI_INT_HIGH);
3980 if (pri == LOW_REALTIME_PRIORITY)
3981 lwkt_setpri(td, TDPRI_SOFT_TIMER);
3982 if (pri == LOW_PRIORITY)
3983 lwkt_setpri(td, TDPRI_IDLE_WORK);
3991 kprintf("ntoskrnl dummy called...\n");
3995 image_patch_table ntoskrnl_functbl[] = {
3996 IMPORT_SFUNC(RtlZeroMemory, 2),
3997 IMPORT_SFUNC(RtlSecureZeroMemory, 2),
3998 IMPORT_SFUNC(RtlFillMemory, 3),
3999 IMPORT_SFUNC(RtlMoveMemory, 3),
4000 IMPORT_SFUNC(RtlCharToInteger, 3),
4001 IMPORT_SFUNC(RtlCopyMemory, 3),
4002 IMPORT_SFUNC(RtlCopyString, 2),
4003 IMPORT_SFUNC(RtlCompareMemory, 3),
4004 IMPORT_SFUNC(RtlEqualUnicodeString, 3),
4005 IMPORT_SFUNC(RtlCopyUnicodeString, 2),
4006 IMPORT_SFUNC(RtlUnicodeStringToAnsiString, 3),
4007 IMPORT_SFUNC(RtlAnsiStringToUnicodeString, 3),
4008 IMPORT_SFUNC(RtlInitAnsiString, 2),
4009 IMPORT_SFUNC_MAP(RtlInitString, RtlInitAnsiString, 2),
4010 IMPORT_SFUNC(RtlInitUnicodeString, 2),
4011 IMPORT_SFUNC(RtlFreeAnsiString, 1),
4012 IMPORT_SFUNC(RtlFreeUnicodeString, 1),
4013 IMPORT_SFUNC(RtlUnicodeStringToInteger, 3),
4014 IMPORT_CFUNC_MAP(sprintf, ksprintf, 0),
4015 IMPORT_CFUNC_MAP(vsprintf, kvsprintf, 0),
4016 IMPORT_CFUNC_MAP(_snprintf, ksnprintf, 0),
4017 IMPORT_CFUNC_MAP(_vsnprintf, kvsnprintf, 0),
4018 IMPORT_CFUNC(DbgPrint, 0),
4019 IMPORT_SFUNC(DbgBreakPoint, 0),
4020 IMPORT_SFUNC(KeBugCheckEx, 5),
4021 IMPORT_CFUNC(strncmp, 0),
4022 IMPORT_CFUNC(strcmp, 0),
4023 IMPORT_CFUNC_MAP(stricmp, strcasecmp, 0),
4024 IMPORT_CFUNC(strncpy, 0),
4025 IMPORT_CFUNC(strcpy, 0),
4026 IMPORT_CFUNC(strlen, 0),
4027 IMPORT_CFUNC_MAP(toupper, ntoskrnl_toupper, 0),
4028 IMPORT_CFUNC_MAP(tolower, ntoskrnl_tolower, 0),
4029 IMPORT_CFUNC_MAP(strstr, ntoskrnl_strstr, 0),
4030 IMPORT_CFUNC_MAP(strncat, ntoskrnl_strncat, 0),
4031 IMPORT_CFUNC_MAP(strchr, index, 0),
4032 IMPORT_CFUNC_MAP(strrchr, rindex, 0),
4033 IMPORT_CFUNC(_memcpy, 0),
4034 IMPORT_CFUNC_MAP(_memmove, ntoskrnl_memmove, 0),
4035 IMPORT_CFUNC_MAP(_memset, ntoskrnl_memset, 0),
4036 IMPORT_CFUNC_MAP(memchr, ntoskrnl_memchr, 0),
4037 IMPORT_SFUNC(IoAllocateDriverObjectExtension, 4),
4038 IMPORT_SFUNC(IoGetDriverObjectExtension, 2),
4039 IMPORT_FFUNC(IofCallDriver, 2),
4040 IMPORT_FFUNC(IofCompleteRequest, 2),
4041 IMPORT_SFUNC(IoAcquireCancelSpinLock, 1),
4042 IMPORT_SFUNC(IoReleaseCancelSpinLock, 1),
4043 IMPORT_SFUNC(IoCancelIrp, 1),
4044 IMPORT_SFUNC(IoConnectInterrupt, 11),
4045 IMPORT_SFUNC(IoDisconnectInterrupt, 1),
4046 IMPORT_SFUNC(IoCreateDevice, 7),
4047 IMPORT_SFUNC(IoDeleteDevice, 1),
4048 IMPORT_SFUNC(IoGetAttachedDevice, 1),
4049 IMPORT_SFUNC(IoAttachDeviceToDeviceStack, 2),
4050 IMPORT_SFUNC(IoDetachDevice, 1),
4051 IMPORT_SFUNC(IoBuildSynchronousFsdRequest, 7),
4052 IMPORT_SFUNC(IoBuildAsynchronousFsdRequest, 6),
4053 IMPORT_SFUNC(IoBuildDeviceIoControlRequest, 9),
4054 IMPORT_SFUNC(IoAllocateIrp, 2),
4055 IMPORT_SFUNC(IoReuseIrp, 2),
4056 IMPORT_SFUNC(IoMakeAssociatedIrp, 2),
4057 IMPORT_SFUNC(IoFreeIrp, 1),
4058 IMPORT_SFUNC(IoInitializeIrp, 3),
4059 IMPORT_SFUNC(KeAcquireInterruptSpinLock, 1),
4060 IMPORT_SFUNC(KeReleaseInterruptSpinLock, 2),
4061 IMPORT_SFUNC(KeSynchronizeExecution, 3),
4062 IMPORT_SFUNC(KeWaitForSingleObject, 5),
4063 IMPORT_SFUNC(KeWaitForMultipleObjects, 8),
4064 IMPORT_SFUNC(_allmul, 4),
4065 IMPORT_SFUNC(_alldiv, 4),
4066 IMPORT_SFUNC(_allrem, 4),
4067 IMPORT_RFUNC(_allshr, 0),
4068 IMPORT_RFUNC(_allshl, 0),
4069 IMPORT_SFUNC(_aullmul, 4),
4070 IMPORT_SFUNC(_aulldiv, 4),
4071 IMPORT_SFUNC(_aullrem, 4),
4072 IMPORT_RFUNC(_aullshr, 0),
4073 IMPORT_RFUNC(_aullshl, 0),
4074 IMPORT_CFUNC(atoi, 0),
4075 IMPORT_CFUNC(atol, 0),
4076 IMPORT_CFUNC(rand, 0),
4077 IMPORT_CFUNC(srand, 0),
4078 IMPORT_SFUNC(WRITE_REGISTER_USHORT, 2),
4079 IMPORT_SFUNC(READ_REGISTER_USHORT, 1),
4080 IMPORT_SFUNC(WRITE_REGISTER_ULONG, 2),
4081 IMPORT_SFUNC(READ_REGISTER_ULONG, 1),
4082 IMPORT_SFUNC(READ_REGISTER_UCHAR, 1),
4083 IMPORT_SFUNC(WRITE_REGISTER_UCHAR, 2),
4084 IMPORT_SFUNC(ExInitializePagedLookasideList, 7),
4085 IMPORT_SFUNC(ExDeletePagedLookasideList, 1),
4086 IMPORT_SFUNC(ExInitializeNPagedLookasideList, 7),
4087 IMPORT_SFUNC(ExDeleteNPagedLookasideList, 1),
4088 IMPORT_FFUNC(InterlockedPopEntrySList, 1),
4089 IMPORT_FFUNC(InitializeSListHead, 1),
4090 IMPORT_FFUNC(InterlockedPushEntrySList, 2),
4091 IMPORT_SFUNC(ExQueryDepthSList, 1),
4092 IMPORT_FFUNC_MAP(ExpInterlockedPopEntrySList,
4093 InterlockedPopEntrySList, 1),
4094 IMPORT_FFUNC_MAP(ExpInterlockedPushEntrySList,
4095 InterlockedPushEntrySList, 2),
4096 IMPORT_FFUNC(ExInterlockedPopEntrySList, 2),
4097 IMPORT_FFUNC(ExInterlockedPushEntrySList, 3),
4098 IMPORT_SFUNC(ExAllocatePoolWithTag, 3),
4099 IMPORT_SFUNC(ExFreePoolWithTag, 2),
4100 IMPORT_SFUNC(ExFreePool, 1),
4102 * For AMD64, we can get away with just mapping
4103 * KeAcquireSpinLockRaiseToDpc() directly to KfAcquireSpinLock()
4104 * because the calling conventions end up being the same.
4106 IMPORT_SFUNC(KeAcquireSpinLockAtDpcLevel, 1),
4107 IMPORT_SFUNC(KeReleaseSpinLockFromDpcLevel, 1),
4108 IMPORT_SFUNC_MAP(KeAcquireSpinLockRaiseToDpc, KfAcquireSpinLock, 1),
4109 IMPORT_SFUNC_MAP(KeReleaseSpinLock, KfReleaseSpinLock, 1),
4110 IMPORT_FFUNC(InterlockedIncrement, 1),
4111 IMPORT_FFUNC(InterlockedDecrement, 1),
4112 IMPORT_FFUNC(InterlockedExchange, 2),
4113 IMPORT_FFUNC(ExInterlockedAddLargeStatistic, 2),
4114 IMPORT_SFUNC(IoAllocateMdl, 5),
4115 IMPORT_SFUNC(IoFreeMdl, 1),
4116 IMPORT_SFUNC(MmAllocateContiguousMemory, 2 + 1),
4117 IMPORT_SFUNC(MmAllocateContiguousMemorySpecifyCache, 5 + 3),
4118 IMPORT_SFUNC(MmFreeContiguousMemory, 1),
4119 IMPORT_SFUNC(MmFreeContiguousMemorySpecifyCache, 3),
4120 IMPORT_SFUNC(MmSizeOfMdl, 1),
4121 IMPORT_SFUNC(MmMapLockedPages, 2),
4122 IMPORT_SFUNC(MmMapLockedPagesSpecifyCache, 6),
4123 IMPORT_SFUNC(MmUnmapLockedPages, 2),
4124 IMPORT_SFUNC(MmBuildMdlForNonPagedPool, 1),
4125 IMPORT_SFUNC(MmGetPhysicalAddress, 1),
4126 IMPORT_SFUNC(MmGetSystemRoutineAddress, 1),
4127 IMPORT_SFUNC(MmIsAddressValid, 1),
4128 IMPORT_SFUNC(MmMapIoSpace, 3 + 1),
4129 IMPORT_SFUNC(MmUnmapIoSpace, 2),
4130 IMPORT_SFUNC(KeInitializeSpinLock, 1),
4131 IMPORT_SFUNC(IoIsWdmVersionAvailable, 2),
4132 IMPORT_SFUNC(IoOpenDeviceRegistryKey, 4),
4133 IMPORT_SFUNC(IoGetDeviceObjectPointer, 4),
4134 IMPORT_SFUNC(IoGetDeviceProperty, 5),
4135 IMPORT_SFUNC(IoAllocateWorkItem, 1),
4136 IMPORT_SFUNC(IoFreeWorkItem, 1),
4137 IMPORT_SFUNC(IoQueueWorkItem, 4),
4138 IMPORT_SFUNC(ExQueueWorkItem, 2),
4139 IMPORT_SFUNC(ntoskrnl_workitem, 2),
4140 IMPORT_SFUNC(KeInitializeMutex, 2),
4141 IMPORT_SFUNC(KeReleaseMutex, 2),
4142 IMPORT_SFUNC(KeReadStateMutex, 1),
4143 IMPORT_SFUNC(KeInitializeEvent, 3),
4144 IMPORT_SFUNC(KeSetEvent, 3),
4145 IMPORT_SFUNC(KeResetEvent, 1),
4146 IMPORT_SFUNC(KeClearEvent, 1),
4147 IMPORT_SFUNC(KeReadStateEvent, 1),
4148 IMPORT_SFUNC(KeInitializeTimer, 1),
4149 IMPORT_SFUNC(KeInitializeTimerEx, 2),
4150 IMPORT_SFUNC(KeSetTimer, 3),
4151 IMPORT_SFUNC(KeSetTimerEx, 4),
4152 IMPORT_SFUNC(KeCancelTimer, 1),
4153 IMPORT_SFUNC(KeReadStateTimer, 1),
4154 IMPORT_SFUNC(KeInitializeDpc, 3),
4155 IMPORT_SFUNC(KeInsertQueueDpc, 3),
4156 IMPORT_SFUNC(KeRemoveQueueDpc, 1),
4157 IMPORT_SFUNC(KeSetImportanceDpc, 2),
4158 IMPORT_SFUNC(KeSetTargetProcessorDpc, 2),
4159 IMPORT_SFUNC(KeFlushQueuedDpcs, 0),
4160 IMPORT_SFUNC(KeGetCurrentProcessorNumber, 1),
4161 IMPORT_SFUNC(ObReferenceObjectByHandle, 6),
4162 IMPORT_FFUNC(ObfDereferenceObject, 1),
4163 IMPORT_SFUNC(ZwClose, 1),
4164 IMPORT_SFUNC(PsCreateSystemThread, 7),
4165 IMPORT_SFUNC(PsTerminateSystemThread, 1),
4166 IMPORT_SFUNC(IoWMIRegistrationControl, 2),
4167 IMPORT_SFUNC(WmiQueryTraceInformation, 5),
4168 IMPORT_CFUNC(WmiTraceMessage, 0),
4169 IMPORT_SFUNC(KeQuerySystemTime, 1),
4170 IMPORT_CFUNC(KeTickCount, 0),
4171 IMPORT_SFUNC(KeDelayExecutionThread, 3),
4172 IMPORT_SFUNC(KeQueryInterruptTime, 0),
4173 IMPORT_SFUNC(KeGetCurrentThread, 0),
4174 IMPORT_SFUNC(KeSetPriorityThread, 2),
4177 * This last entry is a catch-all for any function we haven't
4178 * implemented yet. The PE import list patching routine will
4179 * use it for any function that doesn't have an explicit match
4183 { NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL },
4187 { NULL, NULL, NULL }