3 * Bill Paul <wpaul@windriver.com>. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by Bill Paul.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
32 * $FreeBSD: src/sys/compat/ndis/subr_ntoskrnl.c,v 1.117 2012/11/17 01:51:26 svnexp Exp $
35 #include <sys/ctype.h>
36 #include <sys/unistd.h>
37 #include <sys/param.h>
38 #include <sys/types.h>
39 #include <sys/errno.h>
40 #include <sys/systm.h>
41 #include <sys/malloc.h>
43 #include <sys/thread2.h>
44 #include <sys/mutex.h>
45 #include <sys/mutex2.h>
47 #include <sys/callout.h>
48 #include <sys/kernel.h>
50 #include <sys/condvar.h>
51 #include <sys/kthread.h>
52 #include <sys/module.h>
53 #include <sys/sched.h>
54 #include <sys/sysctl.h>
56 #include <machine/atomic.h>
60 #include <sys/objcache.h>
63 #include <vm/vm_param.h>
65 #include <vm/vm_kern.h>
66 #include <vm/vm_map.h>
67 #include <vm/vm_extern.h>
69 #include <emulation/ndis/pe_var.h>
70 #include <emulation/ndis/cfg_var.h>
71 #include <emulation/ndis/resource_var.h>
72 #include <emulation/ndis/ntoskrnl_var.h>
73 #include <emulation/ndis/hal_var.h>
74 #include <emulation/ndis/ndis_var.h>
76 #include <machine/stdarg.h>
78 #ifdef NTOSKRNL_DEBUG_TIMERS
79 static int sysctl_show_timers(SYSCTL_HANDLER_ARGS);
81 SYSCTL_PROC(_debug, OID_AUTO, ntoskrnl_timers, CTLTYPE_INT | CTLFLAG_RW,
82 NULL, 0, sysctl_show_timers, "I",
83 "Show ntoskrnl timer stats");
97 typedef struct kdpc_queue kdpc_queue;
101 struct thread *we_td;
104 typedef struct wb_ext wb_ext;
106 #define NTOSKRNL_TIMEOUTS 256
107 #ifdef NTOSKRNL_DEBUG_TIMERS
108 static uint64_t ntoskrnl_timer_fires;
109 static uint64_t ntoskrnl_timer_sets;
110 static uint64_t ntoskrnl_timer_reloads;
111 static uint64_t ntoskrnl_timer_cancels;
114 struct callout_entry {
115 struct callout ce_callout;
119 typedef struct callout_entry callout_entry;
121 static struct list_entry ntoskrnl_calllist;
122 static struct mtx ntoskrnl_calllock;
123 struct kuser_shared_data kuser_shared_data;
125 static struct list_entry ntoskrnl_intlist;
126 static kspin_lock ntoskrnl_intlock;
128 static uint8_t RtlEqualUnicodeString(unicode_string *,
129 unicode_string *, uint8_t);
130 static void RtlCopyString(ansi_string *, const ansi_string *);
131 static void RtlCopyUnicodeString(unicode_string *,
133 static irp *IoBuildSynchronousFsdRequest(uint32_t, device_object *,
134 void *, uint32_t, uint64_t *, nt_kevent *, io_status_block *);
135 static irp *IoBuildAsynchronousFsdRequest(uint32_t,
136 device_object *, void *, uint32_t, uint64_t *, io_status_block *);
137 static irp *IoBuildDeviceIoControlRequest(uint32_t,
138 device_object *, void *, uint32_t, void *, uint32_t,
139 uint8_t, nt_kevent *, io_status_block *);
140 static irp *IoAllocateIrp(uint8_t, uint8_t);
141 static void IoReuseIrp(irp *, uint32_t);
142 static void IoFreeIrp(irp *);
143 static void IoInitializeIrp(irp *, uint16_t, uint8_t);
144 static irp *IoMakeAssociatedIrp(irp *, uint8_t);
145 static uint32_t KeWaitForMultipleObjects(uint32_t,
146 nt_dispatch_header **, uint32_t, uint32_t, uint32_t, uint8_t,
147 int64_t *, wait_block *);
148 static void ntoskrnl_waittest(nt_dispatch_header *, uint32_t);
149 static void ntoskrnl_satisfy_wait(nt_dispatch_header *, struct thread *);
150 static void ntoskrnl_satisfy_multiple_waits(wait_block *);
151 static int ntoskrnl_is_signalled(nt_dispatch_header *, struct thread *);
152 static void ntoskrnl_insert_timer(ktimer *, int);
153 static void ntoskrnl_remove_timer(ktimer *);
154 #ifdef NTOSKRNL_DEBUG_TIMERS
155 static void ntoskrnl_show_timers(void);
157 static void ntoskrnl_timercall(void *);
158 static void ntoskrnl_dpc_thread(void *);
159 static void ntoskrnl_destroy_dpc_threads(void);
160 static void ntoskrnl_destroy_workitem_threads(void);
161 static void ntoskrnl_workitem_thread(void *);
162 static void ntoskrnl_workitem(device_object *, void *);
163 static void ntoskrnl_unicode_to_ascii(uint16_t *, char *, int);
164 static void ntoskrnl_ascii_to_unicode(char *, uint16_t *, int);
165 static uint8_t ntoskrnl_insert_dpc(list_entry *, kdpc *);
166 static void WRITE_REGISTER_USHORT(uint16_t *, uint16_t);
167 static uint16_t READ_REGISTER_USHORT(uint16_t *);
168 static void WRITE_REGISTER_ULONG(uint32_t *, uint32_t);
169 static uint32_t READ_REGISTER_ULONG(uint32_t *);
170 static void WRITE_REGISTER_UCHAR(uint8_t *, uint8_t);
171 static uint8_t READ_REGISTER_UCHAR(uint8_t *);
172 static int64_t _allmul(int64_t, int64_t);
173 static int64_t _alldiv(int64_t, int64_t);
174 static int64_t _allrem(int64_t, int64_t);
175 static int64_t _allshr(int64_t, uint8_t);
176 static int64_t _allshl(int64_t, uint8_t);
177 static uint64_t _aullmul(uint64_t, uint64_t);
178 static uint64_t _aulldiv(uint64_t, uint64_t);
179 static uint64_t _aullrem(uint64_t, uint64_t);
180 static uint64_t _aullshr(uint64_t, uint8_t);
181 static uint64_t _aullshl(uint64_t, uint8_t);
182 static slist_entry *ntoskrnl_pushsl(slist_header *, slist_entry *);
183 static void InitializeSListHead(slist_header *);
184 static slist_entry *ntoskrnl_popsl(slist_header *);
185 static void ExFreePoolWithTag(void *, uint32_t);
186 static void ExInitializePagedLookasideList(paged_lookaside_list *,
187 lookaside_alloc_func *, lookaside_free_func *,
188 uint32_t, size_t, uint32_t, uint16_t);
189 static void ExDeletePagedLookasideList(paged_lookaside_list *);
190 static void ExInitializeNPagedLookasideList(npaged_lookaside_list *,
191 lookaside_alloc_func *, lookaside_free_func *,
192 uint32_t, size_t, uint32_t, uint16_t);
193 static void ExDeleteNPagedLookasideList(npaged_lookaside_list *);
195 *ExInterlockedPushEntrySList(slist_header *,
196 slist_entry *, kspin_lock *);
198 *ExInterlockedPopEntrySList(slist_header *, kspin_lock *);
199 static uint32_t InterlockedIncrement(volatile uint32_t *);
200 static uint32_t InterlockedDecrement(volatile uint32_t *);
201 static void ExInterlockedAddLargeStatistic(uint64_t *, uint32_t);
202 static void *MmAllocateContiguousMemory(uint32_t, uint64_t);
203 static void *MmAllocateContiguousMemorySpecifyCache(uint32_t,
204 uint64_t, uint64_t, uint64_t, enum nt_caching_type);
205 static void MmFreeContiguousMemory(void *);
206 static void MmFreeContiguousMemorySpecifyCache(void *, uint32_t,
207 enum nt_caching_type);
208 static uint32_t MmSizeOfMdl(void *, size_t);
209 static void *MmMapLockedPages(mdl *, uint8_t);
210 static void *MmMapLockedPagesSpecifyCache(mdl *,
211 uint8_t, uint32_t, void *, uint32_t, uint32_t);
212 static void MmUnmapLockedPages(void *, mdl *);
213 static device_t ntoskrnl_finddev(device_t, uint64_t, struct resource **);
214 static void RtlZeroMemory(void *, size_t);
215 static void RtlSecureZeroMemory(void *, size_t);
216 static void RtlFillMemory(void *, size_t, uint8_t);
217 static void RtlMoveMemory(void *, const void *, size_t);
218 static ndis_status RtlCharToInteger(const char *, uint32_t, uint32_t *);
219 static void RtlCopyMemory(void *, const void *, size_t);
220 static size_t RtlCompareMemory(const void *, const void *, size_t);
221 static ndis_status RtlUnicodeStringToInteger(unicode_string *,
222 uint32_t, uint32_t *);
223 static int atoi (const char *);
224 static long atol (const char *);
225 static int rand(void);
226 static void srand(unsigned int);
227 static void KeQuerySystemTime(uint64_t *);
228 static uint32_t KeTickCount(void);
229 static uint8_t IoIsWdmVersionAvailable(uint8_t, uint8_t);
230 static int32_t IoOpenDeviceRegistryKey(struct device_object *, uint32_t,
232 static void ntoskrnl_thrfunc(void *);
233 static ndis_status PsCreateSystemThread(ndis_handle *,
234 uint32_t, void *, ndis_handle, void *, void *, void *);
235 static ndis_status PsTerminateSystemThread(ndis_status);
236 static ndis_status IoGetDeviceObjectPointer(unicode_string *,
237 uint32_t, void *, device_object *);
238 static ndis_status IoGetDeviceProperty(device_object *, uint32_t,
239 uint32_t, void *, uint32_t *);
240 static void KeInitializeMutex(kmutant *, uint32_t);
241 static uint32_t KeReleaseMutex(kmutant *, uint8_t);
242 static uint32_t KeReadStateMutex(kmutant *);
243 static ndis_status ObReferenceObjectByHandle(ndis_handle,
244 uint32_t, void *, uint8_t, void **, void **);
245 static void ObfDereferenceObject(void *);
246 static uint32_t ZwClose(ndis_handle);
247 static uint32_t WmiQueryTraceInformation(uint32_t, void *, uint32_t,
249 static uint32_t WmiTraceMessage(uint64_t, uint32_t, void *, uint16_t, ...);
250 static uint32_t IoWMIRegistrationControl(device_object *, uint32_t);
251 static void *ntoskrnl_memset(void *, int, size_t);
252 static void *ntoskrnl_memmove(void *, void *, size_t);
253 static void *ntoskrnl_memchr(void *, unsigned char, size_t);
254 static char *ntoskrnl_strstr(char *, char *);
255 static char *ntoskrnl_strncat(char *, char *, size_t);
256 static int ntoskrnl_toupper(int);
257 static int ntoskrnl_tolower(int);
258 static funcptr ntoskrnl_findwrap(funcptr);
259 static uint32_t DbgPrint(char *, ...) __printflike(1, 2);
260 static void DbgBreakPoint(void);
261 static void KeBugCheckEx(uint32_t, u_long, u_long, u_long, u_long);
262 static int32_t KeDelayExecutionThread(uint8_t, uint8_t, int64_t *);
263 static int32_t KeSetPriorityThread(struct thread *, int32_t);
264 static void dummy(void);
266 static struct lock ntoskrnl_dispatchlock;
267 static struct mtx ntoskrnl_interlock;
268 static kspin_lock ntoskrnl_cancellock;
269 static int ntoskrnl_kth = 0;
270 static struct nt_objref_head ntoskrnl_reflist;
271 static struct objcache *mdl_cache;
272 static struct objcache *iw_cache;
273 static struct kdpc_queue *kq_queues;
274 static struct kdpc_queue *wq_queues;
275 static int wq_idx = 0;
277 static struct objcache_malloc_args mdl_alloc_args = {
278 MDL_ZONE_SIZE, M_DEVBUF
280 static struct objcache_malloc_args iw_alloc_args = {
281 sizeof(io_workitem), M_DEVBUF
285 ntoskrnl_libinit(void)
287 image_patch_table *patch;
294 lockinit(&ntoskrnl_dispatchlock, MTX_NDIS_LOCK, 0, LK_CANRECURSE);
295 mtx_init(&ntoskrnl_interlock, "ndis1");
296 KeInitializeSpinLock(&ntoskrnl_cancellock);
297 KeInitializeSpinLock(&ntoskrnl_intlock);
298 TAILQ_INIT(&ntoskrnl_reflist);
300 InitializeListHead(&ntoskrnl_calllist);
301 InitializeListHead(&ntoskrnl_intlist);
302 mtx_init(&ntoskrnl_calllock, "ndis2");
304 kq_queues = ExAllocatePoolWithTag(NonPagedPool,
305 #ifdef NTOSKRNL_MULTIPLE_DPCS
306 sizeof(kdpc_queue) * ncpus, 0);
308 sizeof(kdpc_queue), 0);
311 if (kq_queues == NULL)
314 wq_queues = ExAllocatePoolWithTag(NonPagedPool,
315 sizeof(kdpc_queue) * WORKITEM_THREADS, 0);
317 if (wq_queues == NULL)
320 #ifdef NTOSKRNL_MULTIPLE_DPCS
321 bzero((char *)kq_queues, sizeof(kdpc_queue) * ncpus);
323 bzero((char *)kq_queues, sizeof(kdpc_queue));
325 bzero((char *)wq_queues, sizeof(kdpc_queue) * WORKITEM_THREADS);
328 * Launch the DPC threads.
331 #ifdef NTOSKRNL_MULTIPLE_DPCS
332 for (i = 0; i < ncpus; i++) {
334 for (i = 0; i < 1; i++) {
338 error = kthread_create_cpu(ntoskrnl_dpc_thread, kq, &p, i,
341 panic("failed to launch DPC thread");
345 * Launch the workitem threads.
348 for (i = 0; i < WORKITEM_THREADS; i++) {
350 error = kthread_create(ntoskrnl_workitem_thread, kq, &p,
351 "Win Workitem %d", i);
353 panic("failed to launch workitem thread");
356 patch = ntoskrnl_functbl;
357 while (patch->ipt_func != NULL) {
358 windrv_wrap((funcptr)patch->ipt_func,
359 (funcptr *)&patch->ipt_wrap,
360 patch->ipt_argcnt, patch->ipt_ftype);
364 for (i = 0; i < NTOSKRNL_TIMEOUTS; i++) {
365 e = ExAllocatePoolWithTag(NonPagedPool,
366 sizeof(callout_entry), 0);
368 panic("failed to allocate timeouts");
369 mtx_spinlock(&ntoskrnl_calllock);
370 InsertHeadList((&ntoskrnl_calllist), (&e->ce_list));
371 mtx_spinunlock(&ntoskrnl_calllock);
375 * MDLs are supposed to be variable size (they describe
376 * buffers containing some number of pages, but we don't
377 * know ahead of time how many pages that will be). But
378 * always allocating them off the heap is very slow. As
379 * a compromise, we create an MDL UMA zone big enough to
380 * handle any buffer requiring up to 16 pages, and we
381 * use those for any MDLs for buffers of 16 pages or less
382 * in size. For buffers larger than that (which we assume
383 * will be few and far between, we allocate the MDLs off
386 * CHANGED TO USING objcache(9) IN DRAGONFLY
389 mdl_cache = objcache_create("Windows MDL", 0, 0,
390 NULL, NULL, NULL, objcache_malloc_alloc, objcache_malloc_free,
393 iw_cache = objcache_create("Windows WorkItem", 0, 0,
394 NULL, NULL, NULL, objcache_malloc_alloc, objcache_malloc_free,
401 ntoskrnl_libfini(void)
403 image_patch_table *patch;
407 patch = ntoskrnl_functbl;
408 while (patch->ipt_func != NULL) {
409 windrv_unwrap(patch->ipt_wrap);
413 /* Stop the workitem queues. */
414 ntoskrnl_destroy_workitem_threads();
415 /* Stop the DPC queues. */
416 ntoskrnl_destroy_dpc_threads();
418 ExFreePool(kq_queues);
419 ExFreePool(wq_queues);
421 objcache_destroy(mdl_cache);
422 objcache_destroy(iw_cache);
424 mtx_spinlock(&ntoskrnl_calllock);
425 while(!IsListEmpty(&ntoskrnl_calllist)) {
426 l = RemoveHeadList(&ntoskrnl_calllist);
427 e = CONTAINING_RECORD(l, callout_entry, ce_list);
428 mtx_spinunlock(&ntoskrnl_calllock);
430 mtx_spinlock(&ntoskrnl_calllock);
432 mtx_spinunlock(&ntoskrnl_calllock);
434 lockuninit(&ntoskrnl_dispatchlock);
435 mtx_uninit(&ntoskrnl_interlock);
436 mtx_uninit(&ntoskrnl_calllock);
442 * We need to be able to reference this externally from the wrapper;
443 * GCC only generates a local implementation of memset.
446 ntoskrnl_memset(void *buf, int ch, size_t size)
448 return (memset(buf, ch, size));
452 ntoskrnl_memmove(void *dst, void *src, size_t size)
454 bcopy(src, dst, size);
459 ntoskrnl_memchr(void *buf, unsigned char ch, size_t len)
462 unsigned char *p = buf;
467 } while (--len != 0);
473 ntoskrnl_strstr(char *s, char *find)
478 if ((c = *find++) != 0) {
482 if ((sc = *s++) == 0)
485 } while (strncmp(s, find, len) != 0);
491 /* Taken from libc */
493 ntoskrnl_strncat(char *dst, char *src, size_t n)
502 if ((*d = *s++) == 0)
512 ntoskrnl_toupper(int c)
518 ntoskrnl_tolower(int c)
524 RtlEqualUnicodeString(unicode_string *str1, unicode_string *str2,
525 uint8_t caseinsensitive)
529 if (str1->us_len != str2->us_len)
532 for (i = 0; i < str1->us_len; i++) {
533 if (caseinsensitive == TRUE) {
534 if (toupper((char)(str1->us_buf[i] & 0xFF)) !=
535 toupper((char)(str2->us_buf[i] & 0xFF)))
538 if (str1->us_buf[i] != str2->us_buf[i])
547 RtlCopyString(ansi_string *dst, const ansi_string *src)
549 if (src != NULL && src->as_buf != NULL && dst->as_buf != NULL) {
550 dst->as_len = min(src->as_len, dst->as_maxlen);
551 memcpy(dst->as_buf, src->as_buf, dst->as_len);
552 if (dst->as_len < dst->as_maxlen)
553 dst->as_buf[dst->as_len] = 0;
559 RtlCopyUnicodeString(unicode_string *dest, unicode_string *src)
562 if (dest->us_maxlen >= src->us_len)
563 dest->us_len = src->us_len;
565 dest->us_len = dest->us_maxlen;
566 memcpy(dest->us_buf, src->us_buf, dest->us_len);
570 ntoskrnl_ascii_to_unicode(char *ascii, uint16_t *unicode, int len)
576 for (i = 0; i < len; i++) {
577 *ustr = (uint16_t)ascii[i];
583 ntoskrnl_unicode_to_ascii(uint16_t *unicode, char *ascii, int len)
589 for (i = 0; i < len / 2; i++) {
590 *astr = (uint8_t)unicode[i];
596 RtlUnicodeStringToAnsiString(ansi_string *dest, unicode_string *src, uint8_t allocate)
598 if (dest == NULL || src == NULL)
599 return (STATUS_INVALID_PARAMETER);
601 dest->as_len = src->us_len / 2;
602 if (dest->as_maxlen < dest->as_len)
603 dest->as_len = dest->as_maxlen;
605 if (allocate == TRUE) {
606 dest->as_buf = ExAllocatePoolWithTag(NonPagedPool,
607 (src->us_len / 2) + 1, 0);
608 if (dest->as_buf == NULL)
609 return (STATUS_INSUFFICIENT_RESOURCES);
610 dest->as_len = dest->as_maxlen = src->us_len / 2;
612 dest->as_len = src->us_len / 2; /* XXX */
613 if (dest->as_maxlen < dest->as_len)
614 dest->as_len = dest->as_maxlen;
617 ntoskrnl_unicode_to_ascii(src->us_buf, dest->as_buf,
620 return (STATUS_SUCCESS);
624 RtlAnsiStringToUnicodeString(unicode_string *dest, ansi_string *src,
627 if (dest == NULL || src == NULL)
628 return (STATUS_INVALID_PARAMETER);
630 if (allocate == TRUE) {
631 dest->us_buf = ExAllocatePoolWithTag(NonPagedPool,
633 if (dest->us_buf == NULL)
634 return (STATUS_INSUFFICIENT_RESOURCES);
635 dest->us_len = dest->us_maxlen = strlen(src->as_buf) * 2;
637 dest->us_len = src->as_len * 2; /* XXX */
638 if (dest->us_maxlen < dest->us_len)
639 dest->us_len = dest->us_maxlen;
642 ntoskrnl_ascii_to_unicode(src->as_buf, dest->us_buf,
645 return (STATUS_SUCCESS);
649 ExAllocatePoolWithTag(uint32_t pooltype, size_t len, uint32_t tag)
653 buf = kmalloc(len, M_DEVBUF, M_NOWAIT|M_ZERO);
661 ExFreePoolWithTag(void *buf, uint32_t tag)
667 ExFreePool(void *buf)
669 kfree(buf, M_DEVBUF);
673 IoAllocateDriverObjectExtension(driver_object *drv, void *clid,
674 uint32_t extlen, void **ext)
676 custom_extension *ce;
678 ce = ExAllocatePoolWithTag(NonPagedPool, sizeof(custom_extension)
682 return (STATUS_INSUFFICIENT_RESOURCES);
685 InsertTailList((&drv->dro_driverext->dre_usrext), (&ce->ce_list));
687 *ext = (void *)(ce + 1);
689 return (STATUS_SUCCESS);
693 IoGetDriverObjectExtension(driver_object *drv, void *clid)
696 custom_extension *ce;
699 * Sanity check. Our dummy bus drivers don't have
700 * any driver extentions.
703 if (drv->dro_driverext == NULL)
706 e = drv->dro_driverext->dre_usrext.nle_flink;
707 while (e != &drv->dro_driverext->dre_usrext) {
708 ce = (custom_extension *)e;
709 if (ce->ce_clid == clid)
710 return ((void *)(ce + 1));
719 IoCreateDevice(driver_object *drv, uint32_t devextlen, unicode_string *devname,
720 uint32_t devtype, uint32_t devchars, uint8_t exclusive,
721 device_object **newdev)
725 dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device_object), 0);
727 return (STATUS_INSUFFICIENT_RESOURCES);
729 dev->do_type = devtype;
730 dev->do_drvobj = drv;
731 dev->do_currirp = NULL;
735 dev->do_devext = ExAllocatePoolWithTag(NonPagedPool,
738 if (dev->do_devext == NULL) {
740 return (STATUS_INSUFFICIENT_RESOURCES);
743 bzero(dev->do_devext, devextlen);
745 dev->do_devext = NULL;
747 dev->do_size = sizeof(device_object) + devextlen;
749 dev->do_attacheddev = NULL;
750 dev->do_nextdev = NULL;
751 dev->do_devtype = devtype;
752 dev->do_stacksize = 1;
753 dev->do_alignreq = 1;
754 dev->do_characteristics = devchars;
755 dev->do_iotimer = NULL;
756 KeInitializeEvent(&dev->do_devlock, EVENT_TYPE_SYNC, TRUE);
759 * Vpd is used for disk/tape devices,
760 * but we don't support those. (Yet.)
764 dev->do_devobj_ext = ExAllocatePoolWithTag(NonPagedPool,
765 sizeof(devobj_extension), 0);
767 if (dev->do_devobj_ext == NULL) {
768 if (dev->do_devext != NULL)
769 ExFreePool(dev->do_devext);
771 return (STATUS_INSUFFICIENT_RESOURCES);
774 dev->do_devobj_ext->dve_type = 0;
775 dev->do_devobj_ext->dve_size = sizeof(devobj_extension);
776 dev->do_devobj_ext->dve_devobj = dev;
779 * Attach this device to the driver object's list
780 * of devices. Note: this is not the same as attaching
781 * the device to the device stack. The driver's AddDevice
782 * routine must explicitly call IoAddDeviceToDeviceStack()
786 if (drv->dro_devobj == NULL) {
787 drv->dro_devobj = dev;
788 dev->do_nextdev = NULL;
790 dev->do_nextdev = drv->dro_devobj;
791 drv->dro_devobj = dev;
796 return (STATUS_SUCCESS);
800 IoDeleteDevice(device_object *dev)
807 if (dev->do_devobj_ext != NULL)
808 ExFreePool(dev->do_devobj_ext);
810 if (dev->do_devext != NULL)
811 ExFreePool(dev->do_devext);
813 /* Unlink the device from the driver's device list. */
815 prev = dev->do_drvobj->dro_devobj;
817 dev->do_drvobj->dro_devobj = dev->do_nextdev;
819 while (prev->do_nextdev != dev)
820 prev = prev->do_nextdev;
821 prev->do_nextdev = dev->do_nextdev;
828 IoGetAttachedDevice(device_object *dev)
837 while (d->do_attacheddev != NULL)
838 d = d->do_attacheddev;
844 IoBuildSynchronousFsdRequest(uint32_t func, device_object *dobj, void *buf,
845 uint32_t len, uint64_t *off, nt_kevent *event, io_status_block *status)
849 ip = IoBuildAsynchronousFsdRequest(func, dobj, buf, len, off, status);
852 ip->irp_usrevent = event;
858 IoBuildAsynchronousFsdRequest(uint32_t func, device_object *dobj, void *buf,
859 uint32_t len, uint64_t *off, io_status_block *status)
862 io_stack_location *sl;
864 ip = IoAllocateIrp(dobj->do_stacksize, TRUE);
868 ip->irp_usriostat = status;
869 ip->irp_tail.irp_overlay.irp_thread = NULL;
871 sl = IoGetNextIrpStackLocation(ip);
872 sl->isl_major = func;
876 sl->isl_devobj = dobj;
877 sl->isl_fileobj = NULL;
878 sl->isl_completionfunc = NULL;
880 ip->irp_userbuf = buf;
882 if (dobj->do_flags & DO_BUFFERED_IO) {
883 ip->irp_assoc.irp_sysbuf =
884 ExAllocatePoolWithTag(NonPagedPool, len, 0);
885 if (ip->irp_assoc.irp_sysbuf == NULL) {
889 bcopy(buf, ip->irp_assoc.irp_sysbuf, len);
892 if (dobj->do_flags & DO_DIRECT_IO) {
893 ip->irp_mdl = IoAllocateMdl(buf, len, FALSE, FALSE, ip);
894 if (ip->irp_mdl == NULL) {
895 if (ip->irp_assoc.irp_sysbuf != NULL)
896 ExFreePool(ip->irp_assoc.irp_sysbuf);
900 ip->irp_userbuf = NULL;
901 ip->irp_assoc.irp_sysbuf = NULL;
904 if (func == IRP_MJ_READ) {
905 sl->isl_parameters.isl_read.isl_len = len;
907 sl->isl_parameters.isl_read.isl_byteoff = *off;
909 sl->isl_parameters.isl_read.isl_byteoff = 0;
912 if (func == IRP_MJ_WRITE) {
913 sl->isl_parameters.isl_write.isl_len = len;
915 sl->isl_parameters.isl_write.isl_byteoff = *off;
917 sl->isl_parameters.isl_write.isl_byteoff = 0;
924 IoBuildDeviceIoControlRequest(uint32_t iocode, device_object *dobj, void *ibuf,
925 uint32_t ilen, void *obuf, uint32_t olen, uint8_t isinternal,
926 nt_kevent *event, io_status_block *status)
929 io_stack_location *sl;
932 ip = IoAllocateIrp(dobj->do_stacksize, TRUE);
935 ip->irp_usrevent = event;
936 ip->irp_usriostat = status;
937 ip->irp_tail.irp_overlay.irp_thread = NULL;
939 sl = IoGetNextIrpStackLocation(ip);
940 sl->isl_major = isinternal == TRUE ?
941 IRP_MJ_INTERNAL_DEVICE_CONTROL : IRP_MJ_DEVICE_CONTROL;
945 sl->isl_devobj = dobj;
946 sl->isl_fileobj = NULL;
947 sl->isl_completionfunc = NULL;
948 sl->isl_parameters.isl_ioctl.isl_iocode = iocode;
949 sl->isl_parameters.isl_ioctl.isl_ibuflen = ilen;
950 sl->isl_parameters.isl_ioctl.isl_obuflen = olen;
952 switch(IO_METHOD(iocode)) {
953 case METHOD_BUFFERED:
959 ip->irp_assoc.irp_sysbuf =
960 ExAllocatePoolWithTag(NonPagedPool, buflen, 0);
961 if (ip->irp_assoc.irp_sysbuf == NULL) {
966 if (ilen && ibuf != NULL) {
967 bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen);
968 bzero((char *)ip->irp_assoc.irp_sysbuf + ilen,
971 bzero(ip->irp_assoc.irp_sysbuf, ilen);
972 ip->irp_userbuf = obuf;
974 case METHOD_IN_DIRECT:
975 case METHOD_OUT_DIRECT:
976 if (ilen && ibuf != NULL) {
977 ip->irp_assoc.irp_sysbuf =
978 ExAllocatePoolWithTag(NonPagedPool, ilen, 0);
979 if (ip->irp_assoc.irp_sysbuf == NULL) {
983 bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen);
985 if (olen && obuf != NULL) {
986 ip->irp_mdl = IoAllocateMdl(obuf, olen,
989 * Normally we would MmProbeAndLockPages()
990 * here, but we don't have to in our
996 ip->irp_userbuf = obuf;
997 sl->isl_parameters.isl_ioctl.isl_type3ibuf = ibuf;
1004 * Ideally, we should associate this IRP with the calling
1012 IoAllocateIrp(uint8_t stsize, uint8_t chargequota)
1016 i = ExAllocatePoolWithTag(NonPagedPool, IoSizeOfIrp(stsize), 0);
1020 IoInitializeIrp(i, IoSizeOfIrp(stsize), stsize);
1026 IoMakeAssociatedIrp(irp *ip, uint8_t stsize)
1030 associrp = IoAllocateIrp(stsize, FALSE);
1031 if (associrp == NULL)
1034 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
1035 associrp->irp_flags |= IRP_ASSOCIATED_IRP;
1036 associrp->irp_tail.irp_overlay.irp_thread =
1037 ip->irp_tail.irp_overlay.irp_thread;
1038 associrp->irp_assoc.irp_master = ip;
1039 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1051 IoInitializeIrp(irp *io, uint16_t psize, uint8_t ssize)
1053 bzero((char *)io, IoSizeOfIrp(ssize));
1054 io->irp_size = psize;
1055 io->irp_stackcnt = ssize;
1056 io->irp_currentstackloc = ssize;
1057 InitializeListHead(&io->irp_thlist);
1058 io->irp_tail.irp_overlay.irp_csl =
1059 (io_stack_location *)(io + 1) + ssize;
1063 IoReuseIrp(irp *ip, uint32_t status)
1067 allocflags = ip->irp_allocflags;
1068 IoInitializeIrp(ip, ip->irp_size, ip->irp_stackcnt);
1069 ip->irp_iostat.isb_status = status;
1070 ip->irp_allocflags = allocflags;
1074 IoAcquireCancelSpinLock(uint8_t *irql)
1076 KeAcquireSpinLock(&ntoskrnl_cancellock, irql);
1080 IoReleaseCancelSpinLock(uint8_t irql)
1082 KeReleaseSpinLock(&ntoskrnl_cancellock, irql);
1086 IoCancelIrp(irp *ip)
1091 IoAcquireCancelSpinLock(&cancelirql);
1092 cfunc = IoSetCancelRoutine(ip, NULL);
1093 ip->irp_cancel = TRUE;
1094 if (cfunc == NULL) {
1095 IoReleaseCancelSpinLock(cancelirql);
1098 ip->irp_cancelirql = cancelirql;
1099 MSCALL2(cfunc, IoGetCurrentIrpStackLocation(ip)->isl_devobj, ip);
1100 return (uint8_t)IoSetCancelValue(ip, TRUE);
1104 IofCallDriver(device_object *dobj, irp *ip)
1106 driver_object *drvobj;
1107 io_stack_location *sl;
1109 driver_dispatch disp;
1111 drvobj = dobj->do_drvobj;
1113 if (ip->irp_currentstackloc <= 0)
1114 panic("IoCallDriver(): out of stack locations");
1116 IoSetNextIrpStackLocation(ip);
1117 sl = IoGetCurrentIrpStackLocation(ip);
1119 sl->isl_devobj = dobj;
1121 disp = drvobj->dro_dispatch[sl->isl_major];
1122 status = MSCALL2(disp, dobj, ip);
1128 IofCompleteRequest(irp *ip, uint8_t prioboost)
1131 device_object *dobj;
1132 io_stack_location *sl;
1135 KASSERT(ip->irp_iostat.isb_status != STATUS_PENDING,
1136 ("incorrect IRP(%p) status (STATUS_PENDING)", ip));
1138 sl = IoGetCurrentIrpStackLocation(ip);
1139 IoSkipCurrentIrpStackLocation(ip);
1142 if (sl->isl_ctl & SL_PENDING_RETURNED)
1143 ip->irp_pendingreturned = TRUE;
1145 if (ip->irp_currentstackloc != (ip->irp_stackcnt + 1))
1146 dobj = IoGetCurrentIrpStackLocation(ip)->isl_devobj;
1150 if (sl->isl_completionfunc != NULL &&
1151 ((ip->irp_iostat.isb_status == STATUS_SUCCESS &&
1152 sl->isl_ctl & SL_INVOKE_ON_SUCCESS) ||
1153 (ip->irp_iostat.isb_status != STATUS_SUCCESS &&
1154 sl->isl_ctl & SL_INVOKE_ON_ERROR) ||
1155 (ip->irp_cancel == TRUE &&
1156 sl->isl_ctl & SL_INVOKE_ON_CANCEL))) {
1157 cf = sl->isl_completionfunc;
1158 status = MSCALL3(cf, dobj, ip, sl->isl_completionctx);
1159 if (status == STATUS_MORE_PROCESSING_REQUIRED)
1162 if ((ip->irp_currentstackloc <= ip->irp_stackcnt) &&
1163 (ip->irp_pendingreturned == TRUE))
1164 IoMarkIrpPending(ip);
1167 /* move to the next. */
1168 IoSkipCurrentIrpStackLocation(ip);
1170 } while (ip->irp_currentstackloc <= (ip->irp_stackcnt + 1));
1172 if (ip->irp_usriostat != NULL)
1173 *ip->irp_usriostat = ip->irp_iostat;
1174 if (ip->irp_usrevent != NULL)
1175 KeSetEvent(ip->irp_usrevent, prioboost, FALSE);
1177 /* Handle any associated IRPs. */
1179 if (ip->irp_flags & IRP_ASSOCIATED_IRP) {
1180 uint32_t masterirpcnt;
1184 masterirp = ip->irp_assoc.irp_master;
1186 InterlockedDecrement(&masterirp->irp_assoc.irp_irpcnt);
1188 while ((m = ip->irp_mdl) != NULL) {
1189 ip->irp_mdl = m->mdl_next;
1193 if (masterirpcnt == 0)
1194 IoCompleteRequest(masterirp, IO_NO_INCREMENT);
1198 /* With any luck, these conditions will never arise. */
1200 if (ip->irp_flags & IRP_PAGING_IO) {
1201 if (ip->irp_mdl != NULL)
1202 IoFreeMdl(ip->irp_mdl);
1208 ntoskrnl_intr(void *arg)
1215 KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
1216 l = ntoskrnl_intlist.nle_flink;
1217 while (l != &ntoskrnl_intlist) {
1218 iobj = CONTAINING_RECORD(l, kinterrupt, ki_list);
1219 claimed = MSCALL2(iobj->ki_svcfunc, iobj, iobj->ki_svcctx);
1220 if (claimed == TRUE)
1224 KeReleaseSpinLock(&ntoskrnl_intlock, irql);
1228 KeAcquireInterruptSpinLock(kinterrupt *iobj)
1231 KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
1236 KeReleaseInterruptSpinLock(kinterrupt *iobj, uint8_t irql)
1238 KeReleaseSpinLock(&ntoskrnl_intlock, irql);
1242 KeSynchronizeExecution(kinterrupt *iobj, void *syncfunc, void *syncctx)
1246 KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
1247 MSCALL1(syncfunc, syncctx);
1248 KeReleaseSpinLock(&ntoskrnl_intlock, irql);
1254 * IoConnectInterrupt() is passed only the interrupt vector and
1255 * irql that a device wants to use, but no device-specific tag
1256 * of any kind. This conflicts rather badly with FreeBSD's
1257 * bus_setup_intr(), which needs the device_t for the device
1258 * requesting interrupt delivery. In order to bypass this
1259 * inconsistency, we implement a second level of interrupt
1260 * dispatching on top of bus_setup_intr(). All devices use
1261 * ntoskrnl_intr() as their ISR, and any device requesting
1262 * interrupts will be registered with ntoskrnl_intr()'s interrupt
1263 * dispatch list. When an interrupt arrives, we walk the list
1264 * and invoke all the registered ISRs. This effectively makes all
1265 * interrupts shared, but it's the only way to duplicate the
1266 * semantics of IoConnectInterrupt() and IoDisconnectInterrupt() properly.
1270 IoConnectInterrupt(kinterrupt **iobj, void *svcfunc, void *svcctx,
1271 kspin_lock *lock, uint32_t vector, uint8_t irql, uint8_t syncirql,
1272 uint8_t imode, uint8_t shared, uint32_t affinity, uint8_t savefloat)
1276 *iobj = ExAllocatePoolWithTag(NonPagedPool, sizeof(kinterrupt), 0);
1278 return (STATUS_INSUFFICIENT_RESOURCES);
1280 (*iobj)->ki_svcfunc = svcfunc;
1281 (*iobj)->ki_svcctx = svcctx;
1284 KeInitializeSpinLock(&(*iobj)->ki_lock_priv);
1285 (*iobj)->ki_lock = &(*iobj)->ki_lock_priv;
1287 (*iobj)->ki_lock = lock;
1289 KeAcquireSpinLock(&ntoskrnl_intlock, &curirql);
1290 InsertHeadList((&ntoskrnl_intlist), (&(*iobj)->ki_list));
1291 KeReleaseSpinLock(&ntoskrnl_intlock, curirql);
1293 return (STATUS_SUCCESS);
1297 IoDisconnectInterrupt(kinterrupt *iobj)
1304 KeAcquireSpinLock(&ntoskrnl_intlock, &irql);
1305 RemoveEntryList((&iobj->ki_list));
1306 KeReleaseSpinLock(&ntoskrnl_intlock, irql);
1312 IoAttachDeviceToDeviceStack(device_object *src, device_object *dst)
1314 device_object *attached;
1316 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
1317 attached = IoGetAttachedDevice(dst);
1318 attached->do_attacheddev = src;
1319 src->do_attacheddev = NULL;
1320 src->do_stacksize = attached->do_stacksize + 1;
1321 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1327 IoDetachDevice(device_object *topdev)
1329 device_object *tail;
1331 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
1333 /* First, break the chain. */
1334 tail = topdev->do_attacheddev;
1336 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1339 topdev->do_attacheddev = tail->do_attacheddev;
1340 topdev->do_refcnt--;
1342 /* Now reduce the stacksize count for the takm_il objects. */
1344 tail = topdev->do_attacheddev;
1345 while (tail != NULL) {
1346 tail->do_stacksize--;
1347 tail = tail->do_attacheddev;
1350 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1354 * For the most part, an object is considered signalled if
1355 * dh_sigstate == TRUE. The exception is for mutant objects
1356 * (mutexes), where the logic works like this:
1358 * - If the thread already owns the object and sigstate is
1359 * less than or equal to 0, then the object is considered
1360 * signalled (recursive acquisition).
1361 * - If dh_sigstate == 1, the object is also considered
1366 ntoskrnl_is_signalled(nt_dispatch_header *obj, struct thread *td)
1370 if (obj->dh_type == DISP_TYPE_MUTANT) {
1371 km = (kmutant *)obj;
1372 if ((obj->dh_sigstate <= 0 && km->km_ownerthread == td) ||
1373 obj->dh_sigstate == 1)
1378 if (obj->dh_sigstate > 0)
1384 ntoskrnl_satisfy_wait(nt_dispatch_header *obj, struct thread *td)
1388 switch (obj->dh_type) {
1389 case DISP_TYPE_MUTANT:
1390 km = (struct kmutant *)obj;
1393 * If sigstate reaches 0, the mutex is now
1394 * non-signalled (the new thread owns it).
1396 if (obj->dh_sigstate == 0) {
1397 km->km_ownerthread = td;
1398 if (km->km_abandoned == TRUE)
1399 km->km_abandoned = FALSE;
1402 /* Synchronization objects get reset to unsignalled. */
1403 case DISP_TYPE_SYNCHRONIZATION_EVENT:
1404 case DISP_TYPE_SYNCHRONIZATION_TIMER:
1405 obj->dh_sigstate = 0;
1407 case DISP_TYPE_SEMAPHORE:
1416 ntoskrnl_satisfy_multiple_waits(wait_block *wb)
1422 td = wb->wb_kthread;
1425 ntoskrnl_satisfy_wait(wb->wb_object, td);
1426 cur->wb_awakened = TRUE;
1428 } while (cur != wb);
1431 /* Always called with dispatcher lock held. */
1433 ntoskrnl_waittest(nt_dispatch_header *obj, uint32_t increment)
1435 wait_block *w, *next;
1442 * Once an object has been signalled, we walk its list of
1443 * wait blocks. If a wait block can be awakened, then satisfy
1444 * waits as necessary and wake the thread.
1446 * The rules work like this:
1448 * If a wait block is marked as WAITTYPE_ANY, then
1449 * we can satisfy the wait conditions on the current
1450 * object and wake the thread right away. Satisfying
1451 * the wait also has the effect of breaking us out
1452 * of the search loop.
1454 * If the object is marked as WAITTYLE_ALL, then the
1455 * wait block will be part of a circularly linked
1456 * list of wait blocks belonging to a waiting thread
1457 * that's sleeping in KeWaitForMultipleObjects(). In
1458 * order to wake the thread, all the objects in the
1459 * wait list must be in the signalled state. If they
1460 * are, we then satisfy all of them and wake the
1465 e = obj->dh_waitlisthead.nle_flink;
1467 while (e != &obj->dh_waitlisthead && obj->dh_sigstate > 0) {
1468 w = CONTAINING_RECORD(e, wait_block, wb_waitlist);
1472 if (w->wb_waittype == WAITTYPE_ANY) {
1474 * Thread can be awakened if
1475 * any wait is satisfied.
1477 ntoskrnl_satisfy_wait(obj, td);
1479 w->wb_awakened = TRUE;
1482 * Thread can only be woken up
1483 * if all waits are satisfied.
1484 * If the thread is waiting on multiple
1485 * objects, they should all be linked
1486 * through the wb_next pointers in the
1492 if (ntoskrnl_is_signalled(obj, td) == FALSE) {
1496 next = next->wb_next;
1498 ntoskrnl_satisfy_multiple_waits(w);
1501 if (satisfied == TRUE)
1502 cv_broadcastpri(&we->we_cv,
1503 (w->wb_oldpri - (increment * 4)) > PRI_MIN_KERN ?
1504 w->wb_oldpri - (increment * 4) : PRI_MIN_KERN);
1511 * Return the number of 100 nanosecond intervals since
1512 * January 1, 1601. (?!?!)
1515 ntoskrnl_time(uint64_t *tval)
1520 *tval = (uint64_t)ts.tv_nsec / 100 + (uint64_t)ts.tv_sec * 10000000 +
1521 11644473600 * 10000000; /* 100ns ticks from 1601 to 1970 */
1525 KeQuerySystemTime(uint64_t *current_time)
1527 ntoskrnl_time(current_time);
1534 getmicrouptime(&tv);
1535 return tvtohz_high(&tv);
1540 * KeWaitForSingleObject() is a tricky beast, because it can be used
1541 * with several different object types: semaphores, timers, events,
1542 * mutexes and threads. Semaphores don't appear very often, but the
1543 * other object types are quite common. KeWaitForSingleObject() is
1544 * what's normally used to acquire a mutex, and it can be used to
1545 * wait for a thread termination.
1547 * The Windows NDIS API is implemented in terms of Windows kernel
1548 * primitives, and some of the object manipulation is duplicated in
1549 * NDIS. For example, NDIS has timers and events, which are actually
1550 * Windows kevents and ktimers. Now, you're supposed to only use the
1551 * NDIS variants of these objects within the confines of the NDIS API,
1552 * but there are some naughty developers out there who will use
1553 * KeWaitForSingleObject() on NDIS timer and event objects, so we
1554 * have to support that as well. Conseqently, our NDIS timer and event
1555 * code has to be closely tied into our ntoskrnl timer and event code,
1556 * just as it is in Windows.
1558 * KeWaitForSingleObject() may do different things for different kinds
1561 * - For events, we check if the event has been signalled. If the
1562 * event is already in the signalled state, we just return immediately,
1563 * otherwise we wait for it to be set to the signalled state by someone
1564 * else calling KeSetEvent(). Events can be either synchronization or
1565 * notification events.
1567 * - For timers, if the timer has already fired and the timer is in
1568 * the signalled state, we just return, otherwise we wait on the
1569 * timer. Unlike an event, timers get signalled automatically when
1570 * they expire rather than someone having to trip them manually.
1571 * Timers initialized with KeInitializeTimer() are always notification
1572 * events: KeInitializeTimerEx() lets you initialize a timer as
1573 * either a notification or synchronization event.
1575 * - For mutexes, we try to acquire the mutex and if we can't, we wait
1576 * on the mutex until it's available and then grab it. When a mutex is
1577 * released, it enters the signalled state, which wakes up one of the
1578 * threads waiting to acquire it. Mutexes are always synchronization
1581 * - For threads, the only thing we do is wait until the thread object
1582 * enters a signalled state, which occurs when the thread terminates.
1583 * Threads are always notification events.
1585 * A notification event wakes up all threads waiting on an object. A
1586 * synchronization event wakes up just one. Also, a synchronization event
1587 * is auto-clearing, which means we automatically set the event back to
1588 * the non-signalled state once the wakeup is done.
1592 KeWaitForSingleObject(void *arg, uint32_t reason, uint32_t mode,
1593 uint8_t alertable, int64_t *duetime)
1596 struct thread *td = curthread;
1601 nt_dispatch_header *obj;
1606 return (STATUS_INVALID_PARAMETER);
1608 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
1610 cv_init(&we.we_cv, "KeWFS");
1614 * Check to see if this object is already signalled,
1615 * and just return without waiting if it is.
1617 if (ntoskrnl_is_signalled(obj, td) == TRUE) {
1618 /* Sanity check the signal state value. */
1619 if (obj->dh_sigstate != INT32_MIN) {
1620 ntoskrnl_satisfy_wait(obj, curthread);
1621 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1622 return (STATUS_SUCCESS);
1625 * There's a limit to how many times we can
1626 * recursively acquire a mutant. If we hit
1627 * the limit, something is very wrong.
1629 if (obj->dh_type == DISP_TYPE_MUTANT) {
1630 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1631 panic("mutant limit exceeded");
1636 bzero((char *)&w, sizeof(wait_block));
1639 w.wb_waittype = WAITTYPE_ANY;
1642 w.wb_awakened = FALSE;
1643 w.wb_oldpri = td->td_pri;
1645 InsertTailList((&obj->dh_waitlisthead), (&w.wb_waitlist));
1648 * The timeout value is specified in 100 nanosecond units
1649 * and can be a positive or negative number. If it's positive,
1650 * then the duetime is absolute, and we need to convert it
1651 * to an absolute offset relative to now in order to use it.
1652 * If it's negative, then the duetime is relative and we
1653 * just have to convert the units.
1656 if (duetime != NULL) {
1658 tv.tv_sec = - (*duetime) / 10000000;
1659 tv.tv_usec = (- (*duetime) / 10) -
1660 (tv.tv_sec * 1000000);
1662 ntoskrnl_time(&curtime);
1663 if (*duetime < curtime)
1664 tv.tv_sec = tv.tv_usec = 0;
1666 tv.tv_sec = ((*duetime) - curtime) / 10000000;
1667 tv.tv_usec = ((*duetime) - curtime) / 10 -
1668 (tv.tv_sec * 1000000);
1673 if (duetime == NULL)
1674 cv_wait(&we.we_cv, &ntoskrnl_dispatchlock);
1676 error = cv_timedwait(&we.we_cv,
1677 &ntoskrnl_dispatchlock, tvtohz_high(&tv));
1679 RemoveEntryList(&w.wb_waitlist);
1681 cv_destroy(&we.we_cv);
1683 /* We timed out. Leave the object alone and return status. */
1685 if (error == EWOULDBLOCK) {
1686 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1687 return (STATUS_TIMEOUT);
1690 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1692 return (STATUS_SUCCESS);
1694 return (KeWaitForMultipleObjects(1, &obj, WAITTYPE_ALL, reason,
1695 mode, alertable, duetime, &w));
1700 KeWaitForMultipleObjects(uint32_t cnt, nt_dispatch_header *obj[], uint32_t wtype,
1701 uint32_t reason, uint32_t mode, uint8_t alertable, int64_t *duetime,
1702 wait_block *wb_array)
1704 struct thread *td = curthread;
1705 wait_block *whead, *w;
1706 wait_block _wb_array[MAX_WAIT_OBJECTS];
1707 nt_dispatch_header *cur;
1709 int i, wcnt = 0, error = 0;
1711 struct timespec t1, t2;
1712 uint32_t status = STATUS_SUCCESS;
1715 if (cnt > MAX_WAIT_OBJECTS)
1716 return (STATUS_INVALID_PARAMETER);
1717 if (cnt > THREAD_WAIT_OBJECTS && wb_array == NULL)
1718 return (STATUS_INVALID_PARAMETER);
1720 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
1722 cv_init(&we.we_cv, "KeWFM");
1725 if (wb_array == NULL)
1730 bzero((char *)whead, sizeof(wait_block) * cnt);
1732 /* First pass: see if we can satisfy any waits immediately. */
1737 for (i = 0; i < cnt; i++) {
1738 InsertTailList((&obj[i]->dh_waitlisthead),
1741 w->wb_object = obj[i];
1742 w->wb_waittype = wtype;
1744 w->wb_awakened = FALSE;
1745 w->wb_oldpri = td->td_pri;
1749 if (ntoskrnl_is_signalled(obj[i], td)) {
1751 * There's a limit to how many times
1752 * we can recursively acquire a mutant.
1753 * If we hit the limit, something
1756 if (obj[i]->dh_sigstate == INT32_MIN &&
1757 obj[i]->dh_type == DISP_TYPE_MUTANT) {
1758 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1759 panic("mutant limit exceeded");
1763 * If this is a WAITTYPE_ANY wait, then
1764 * satisfy the waited object and exit
1768 if (wtype == WAITTYPE_ANY) {
1769 ntoskrnl_satisfy_wait(obj[i], td);
1770 status = STATUS_WAIT_0 + i;
1775 w->wb_object = NULL;
1776 RemoveEntryList(&w->wb_waitlist);
1782 * If this is a WAITTYPE_ALL wait and all objects are
1783 * already signalled, satisfy the waits and exit now.
1786 if (wtype == WAITTYPE_ALL && wcnt == 0) {
1787 for (i = 0; i < cnt; i++)
1788 ntoskrnl_satisfy_wait(obj[i], td);
1789 status = STATUS_SUCCESS;
1794 * Create a circular waitblock list. The waitcount
1795 * must always be non-zero when we get here.
1798 (w - 1)->wb_next = whead;
1800 /* Wait on any objects that aren't yet signalled. */
1802 /* Calculate timeout, if any. */
1804 if (duetime != NULL) {
1806 tv.tv_sec = - (*duetime) / 10000000;
1807 tv.tv_usec = (- (*duetime) / 10) -
1808 (tv.tv_sec * 1000000);
1810 ntoskrnl_time(&curtime);
1811 if (*duetime < curtime)
1812 tv.tv_sec = tv.tv_usec = 0;
1814 tv.tv_sec = ((*duetime) - curtime) / 10000000;
1815 tv.tv_usec = ((*duetime) - curtime) / 10 -
1816 (tv.tv_sec * 1000000);
1824 if (duetime == NULL)
1825 cv_wait(&we.we_cv, &ntoskrnl_dispatchlock);
1827 error = cv_timedwait(&we.we_cv,
1828 &ntoskrnl_dispatchlock, tvtohz_high(&tv));
1830 /* Wait with timeout expired. */
1833 status = STATUS_TIMEOUT;
1839 /* See what's been signalled. */
1844 if (ntoskrnl_is_signalled(cur, td) == TRUE ||
1845 w->wb_awakened == TRUE) {
1846 /* Sanity check the signal state value. */
1847 if (cur->dh_sigstate == INT32_MIN &&
1848 cur->dh_type == DISP_TYPE_MUTANT) {
1849 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1850 panic("mutant limit exceeded");
1853 if (wtype == WAITTYPE_ANY) {
1854 status = w->wb_waitkey &
1860 } while (w != whead);
1863 * If all objects have been signalled, or if this
1864 * is a WAITTYPE_ANY wait and we were woke up by
1865 * someone, we can bail.
1869 status = STATUS_SUCCESS;
1874 * If this is WAITTYPE_ALL wait, and there's still
1875 * objects that haven't been signalled, deduct the
1876 * time that's elapsed so far from the timeout and
1877 * wait again (or continue waiting indefinitely if
1878 * there's no timeout).
1881 if (duetime != NULL) {
1882 tv.tv_sec -= (t2.tv_sec - t1.tv_sec);
1883 tv.tv_usec -= (t2.tv_nsec - t1.tv_nsec) / 1000;
1890 cv_destroy(&we.we_cv);
1892 for (i = 0; i < cnt; i++) {
1893 if (whead[i].wb_object != NULL)
1894 RemoveEntryList(&whead[i].wb_waitlist);
1897 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
1903 WRITE_REGISTER_USHORT(uint16_t *reg, uint16_t val)
1905 bus_space_write_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val);
1909 READ_REGISTER_USHORT(uint16_t *reg)
1911 return (bus_space_read_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg));
1915 WRITE_REGISTER_ULONG(uint32_t *reg, uint32_t val)
1917 bus_space_write_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val);
1921 READ_REGISTER_ULONG(uint32_t *reg)
1923 return (bus_space_read_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg));
1927 READ_REGISTER_UCHAR(uint8_t *reg)
1929 return (bus_space_read_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg));
1933 WRITE_REGISTER_UCHAR(uint8_t *reg, uint8_t val)
1935 bus_space_write_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val);
1939 _allmul(int64_t a, int64_t b)
1945 _alldiv(int64_t a, int64_t b)
1951 _allrem(int64_t a, int64_t b)
1957 _aullmul(uint64_t a, uint64_t b)
1963 _aulldiv(uint64_t a, uint64_t b)
1969 _aullrem(uint64_t a, uint64_t b)
1975 _allshl(int64_t a, uint8_t b)
1981 _aullshl(uint64_t a, uint8_t b)
1987 _allshr(int64_t a, uint8_t b)
1993 _aullshr(uint64_t a, uint8_t b)
1998 static slist_entry *
1999 ntoskrnl_pushsl(slist_header *head, slist_entry *entry)
2001 slist_entry *oldhead;
2003 oldhead = head->slh_list.slh_next;
2004 entry->sl_next = head->slh_list.slh_next;
2005 head->slh_list.slh_next = entry;
2006 head->slh_list.slh_depth++;
2007 head->slh_list.slh_seq++;
2013 InitializeSListHead(slist_header *head)
2015 memset(head, 0, sizeof(*head));
2018 static slist_entry *
2019 ntoskrnl_popsl(slist_header *head)
2023 first = head->slh_list.slh_next;
2024 if (first != NULL) {
2025 head->slh_list.slh_next = first->sl_next;
2026 head->slh_list.slh_depth--;
2027 head->slh_list.slh_seq++;
2034 * We need this to make lookaside lists work for amd64.
2035 * We pass a pointer to ExAllocatePoolWithTag() the lookaside
2036 * list structure. For amd64 to work right, this has to be a
2037 * pointer to the wrapped version of the routine, not the
2038 * original. Letting the Windows driver invoke the original
2039 * function directly will result in a convention calling
2040 * mismatch and a pretty crash. On x86, this effectively
2041 * becomes a no-op since ipt_func and ipt_wrap are the same.
2045 ntoskrnl_findwrap(funcptr func)
2047 image_patch_table *patch;
2049 patch = ntoskrnl_functbl;
2050 while (patch->ipt_func != NULL) {
2051 if ((funcptr)patch->ipt_func == func)
2052 return ((funcptr)patch->ipt_wrap);
2060 ExInitializePagedLookasideList(paged_lookaside_list *lookaside,
2061 lookaside_alloc_func *allocfunc, lookaside_free_func *freefunc,
2062 uint32_t flags, size_t size, uint32_t tag, uint16_t depth)
2064 bzero((char *)lookaside, sizeof(paged_lookaside_list));
2066 if (size < sizeof(slist_entry))
2067 lookaside->nll_l.gl_size = sizeof(slist_entry);
2069 lookaside->nll_l.gl_size = size;
2070 lookaside->nll_l.gl_tag = tag;
2071 if (allocfunc == NULL)
2072 lookaside->nll_l.gl_allocfunc =
2073 ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag);
2075 lookaside->nll_l.gl_allocfunc = allocfunc;
2077 if (freefunc == NULL)
2078 lookaside->nll_l.gl_freefunc =
2079 ntoskrnl_findwrap((funcptr)ExFreePool);
2081 lookaside->nll_l.gl_freefunc = freefunc;
2083 lookaside->nll_l.gl_type = NonPagedPool;
2084 lookaside->nll_l.gl_depth = depth;
2085 lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH;
2089 ExDeletePagedLookasideList(paged_lookaside_list *lookaside)
2092 void (*freefunc)(void *);
2094 freefunc = lookaside->nll_l.gl_freefunc;
2095 while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL)
2096 MSCALL1(freefunc, buf);
2100 ExInitializeNPagedLookasideList(npaged_lookaside_list *lookaside,
2101 lookaside_alloc_func *allocfunc, lookaside_free_func *freefunc,
2102 uint32_t flags, size_t size, uint32_t tag, uint16_t depth)
2104 bzero((char *)lookaside, sizeof(npaged_lookaside_list));
2106 if (size < sizeof(slist_entry))
2107 lookaside->nll_l.gl_size = sizeof(slist_entry);
2109 lookaside->nll_l.gl_size = size;
2110 lookaside->nll_l.gl_tag = tag;
2111 if (allocfunc == NULL)
2112 lookaside->nll_l.gl_allocfunc =
2113 ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag);
2115 lookaside->nll_l.gl_allocfunc = allocfunc;
2117 if (freefunc == NULL)
2118 lookaside->nll_l.gl_freefunc =
2119 ntoskrnl_findwrap((funcptr)ExFreePool);
2121 lookaside->nll_l.gl_freefunc = freefunc;
2123 lookaside->nll_l.gl_type = NonPagedPool;
2124 lookaside->nll_l.gl_depth = depth;
2125 lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH;
2129 ExDeleteNPagedLookasideList(npaged_lookaside_list *lookaside)
2132 void (*freefunc)(void *);
2134 freefunc = lookaside->nll_l.gl_freefunc;
2135 while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL)
2136 MSCALL1(freefunc, buf);
2140 InterlockedPushEntrySList(slist_header *head, slist_entry *entry)
2142 slist_entry *oldhead;
2144 mtx_spinlock(&ntoskrnl_interlock);
2145 oldhead = ntoskrnl_pushsl(head, entry);
2146 mtx_spinunlock(&ntoskrnl_interlock);
2152 InterlockedPopEntrySList(slist_header *head)
2156 mtx_spinlock(&ntoskrnl_interlock);
2157 first = ntoskrnl_popsl(head);
2158 mtx_spinunlock(&ntoskrnl_interlock);
2163 static slist_entry *
2164 ExInterlockedPushEntrySList(slist_header *head, slist_entry *entry,
2167 return (InterlockedPushEntrySList(head, entry));
2170 static slist_entry *
2171 ExInterlockedPopEntrySList(slist_header *head, kspin_lock *lock)
2173 return (InterlockedPopEntrySList(head));
2177 ExQueryDepthSList(slist_header *head)
2181 mtx_spinlock(&ntoskrnl_interlock);
2182 depth = head->slh_list.slh_depth;
2183 mtx_spinunlock(&ntoskrnl_interlock);
2189 KeInitializeSpinLock(kspin_lock *lock)
2195 KeAcquireSpinLockAtDpcLevel(kspin_lock *lock)
2197 while (atomic_cmpset_acq_int((volatile u_int *)lock, 0, 1) == 0)
2202 KeReleaseSpinLockFromDpcLevel(kspin_lock *lock)
2204 atomic_store_rel_int((volatile u_int *)lock, 0);
2208 InterlockedExchange(volatile uint32_t *dst, uintptr_t val)
2212 mtx_spinlock(&ntoskrnl_interlock);
2215 mtx_spinunlock(&ntoskrnl_interlock);
2221 InterlockedIncrement(volatile uint32_t *addend)
2223 atomic_add_long((volatile u_long *)addend, 1);
2228 InterlockedDecrement(volatile uint32_t *addend)
2230 atomic_subtract_long((volatile u_long *)addend, 1);
2235 ExInterlockedAddLargeStatistic(uint64_t *addend, uint32_t inc)
2237 mtx_spinlock(&ntoskrnl_interlock);
2239 mtx_spinunlock(&ntoskrnl_interlock);
2243 IoAllocateMdl(void *vaddr, uint32_t len, uint8_t secondarybuf,
2244 uint8_t chargequota, irp *iopkt)
2249 if (MmSizeOfMdl(vaddr, len) > MDL_ZONE_SIZE)
2250 m = ExAllocatePoolWithTag(NonPagedPool,
2251 MmSizeOfMdl(vaddr, len), 0);
2253 m = objcache_get(mdl_cache, M_NOWAIT);
2254 bzero(m, sizeof(mdl));
2261 MmInitializeMdl(m, vaddr, len);
2264 * MmInitializMdl() clears the flags field, so we
2265 * have to set this here. If the MDL came from the
2266 * MDL UMA zone, tag it so we can release it to
2267 * the right place later.
2270 m->mdl_flags = MDL_ZONE_ALLOCED;
2272 if (iopkt != NULL) {
2273 if (secondarybuf == TRUE) {
2275 last = iopkt->irp_mdl;
2276 while (last->mdl_next != NULL)
2277 last = last->mdl_next;
2280 if (iopkt->irp_mdl != NULL)
2281 panic("leaking an MDL in IoAllocateMdl()");
2295 if (m->mdl_flags & MDL_ZONE_ALLOCED)
2296 objcache_put(mdl_cache, m);
2302 MmAllocateContiguousMemory(uint32_t size, uint64_t highest)
2305 size_t pagelength = roundup(size, PAGE_SIZE);
2307 addr = ExAllocatePoolWithTag(NonPagedPool, pagelength, 0);
2312 #if 0 /* XXX swildner */
2314 MmAllocateContiguousMemorySpecifyCache(uint32_t size, uint64_t lowest,
2315 uint64_t highest, uint64_t boundary, enum nt_caching_type cachetype)
2317 vm_memattr_t memattr;
2320 switch (cachetype) {
2322 memattr = VM_MEMATTR_UNCACHEABLE;
2324 case MmWriteCombined:
2325 memattr = VM_MEMATTR_WRITE_COMBINING;
2327 case MmNonCachedUnordered:
2328 memattr = VM_MEMATTR_UNCACHEABLE;
2331 case MmHardwareCoherentCached:
2334 memattr = VM_MEMATTR_DEFAULT;
2338 ret = (void *)kmem_alloc_contig(kernel_map, size, M_ZERO | M_NOWAIT,
2339 lowest, highest, PAGE_SIZE, boundary, memattr);
2341 malloc_type_allocated(M_DEVBUF, round_page(size));
2346 MmAllocateContiguousMemorySpecifyCache(uint32_t size, uint64_t lowest,
2347 uint64_t highest, uint64_t boundary, enum nt_caching_type cachetype)
2351 size_t pagelength = roundup(size, PAGE_SIZE);
2353 addr = ExAllocatePoolWithTag(NonPagedPool, pagelength, 0);
2357 panic("%s", __func__);
2363 MmFreeContiguousMemory(void *base)
2369 MmFreeContiguousMemorySpecifyCache(void *base, uint32_t size,
2370 enum nt_caching_type cachetype)
2372 contigfree(base, size, M_DEVBUF);
2376 MmSizeOfMdl(void *vaddr, size_t len)
2380 l = sizeof(struct mdl) +
2381 (sizeof(vm_offset_t *) * SPAN_PAGES(vaddr, len));
2387 * The Microsoft documentation says this routine fills in the
2388 * page array of an MDL with the _physical_ page addresses that
2389 * comprise the buffer, but we don't really want to do that here.
2390 * Instead, we just fill in the page array with the kernel virtual
2391 * addresses of the buffers.
2394 MmBuildMdlForNonPagedPool(mdl *m)
2396 vm_offset_t *mdl_pages;
2399 pagecnt = SPAN_PAGES(m->mdl_byteoffset, m->mdl_bytecount);
2401 if (pagecnt > (m->mdl_size - sizeof(mdl)) / sizeof(vm_offset_t *))
2402 panic("not enough pages in MDL to describe buffer");
2404 mdl_pages = MmGetMdlPfnArray(m);
2406 for (i = 0; i < pagecnt; i++)
2407 *mdl_pages = (vm_offset_t)m->mdl_startva + (i * PAGE_SIZE);
2409 m->mdl_flags |= MDL_SOURCE_IS_NONPAGED_POOL;
2410 m->mdl_mappedsystemva = MmGetMdlVirtualAddress(m);
2414 MmMapLockedPages(mdl *buf, uint8_t accessmode)
2416 buf->mdl_flags |= MDL_MAPPED_TO_SYSTEM_VA;
2417 return (MmGetMdlVirtualAddress(buf));
2421 MmMapLockedPagesSpecifyCache(mdl *buf, uint8_t accessmode, uint32_t cachetype,
2422 void *vaddr, uint32_t bugcheck, uint32_t prio)
2424 return (MmMapLockedPages(buf, accessmode));
2428 MmUnmapLockedPages(void *vaddr, mdl *buf)
2430 buf->mdl_flags &= ~MDL_MAPPED_TO_SYSTEM_VA;
2434 * This function has a problem in that it will break if you
2435 * compile this module without PAE and try to use it on a PAE
2436 * kernel. Unfortunately, there's no way around this at the
2437 * moment. It's slightly less broken that using pmap_kextract().
2438 * You'd think the virtual memory subsystem would help us out
2439 * here, but it doesn't.
2443 MmGetPhysicalAddress(void *base)
2445 return (pmap_extract(kernel_map.pmap, (vm_offset_t)base));
2449 MmGetSystemRoutineAddress(unicode_string *ustr)
2453 if (RtlUnicodeStringToAnsiString(&astr, ustr, TRUE))
2455 return (ndis_get_routine_address(ntoskrnl_functbl, astr.as_buf));
2459 MmIsAddressValid(void *vaddr)
2461 if (pmap_extract(kernel_map.pmap, (vm_offset_t)vaddr))
2468 MmMapIoSpace(uint64_t paddr, uint32_t len, uint32_t cachetype)
2470 devclass_t nexus_class;
2471 device_t *nexus_devs, devp;
2472 int nexus_count = 0;
2473 device_t matching_dev = NULL;
2474 struct resource *res;
2478 /* There will always be at least one nexus. */
2480 nexus_class = devclass_find("nexus");
2481 devclass_get_devices(nexus_class, &nexus_devs, &nexus_count);
2483 for (i = 0; i < nexus_count; i++) {
2484 devp = nexus_devs[i];
2485 matching_dev = ntoskrnl_finddev(devp, paddr, &res);
2490 kfree(nexus_devs, M_TEMP);
2492 if (matching_dev == NULL)
2495 v = (vm_offset_t)rman_get_virtual(res);
2496 if (paddr > rman_get_start(res))
2497 v += paddr - rman_get_start(res);
2503 MmUnmapIoSpace(void *vaddr, size_t len)
2509 ntoskrnl_finddev(device_t dev, uint64_t paddr, struct resource **res)
2511 device_t *children = NULL;
2512 device_t matching_dev;
2515 struct resource_list *rl;
2516 struct resource_list_entry *rle;
2520 /* We only want devices that have been successfully probed. */
2522 if (device_is_alive(dev) == FALSE)
2525 rl = BUS_GET_RESOURCE_LIST(device_get_parent(dev), dev);
2527 SLIST_FOREACH(rle, rl, link) {
2533 flags = rman_get_flags(r);
2535 if (rle->type == SYS_RES_MEMORY &&
2536 paddr >= rman_get_start(r) &&
2537 paddr <= rman_get_end(r)) {
2538 if (!(flags & RF_ACTIVE))
2539 bus_activate_resource(dev,
2540 SYS_RES_MEMORY, 0, r);
2548 * If this device has children, do another
2549 * level of recursion to inspect them.
2552 device_get_children(dev, &children, &childcnt);
2554 for (i = 0; i < childcnt; i++) {
2555 matching_dev = ntoskrnl_finddev(children[i], paddr, res);
2556 if (matching_dev != NULL) {
2557 kfree(children, M_TEMP);
2558 return (matching_dev);
2563 /* Won't somebody please think of the children! */
2565 if (children != NULL)
2566 kfree(children, M_TEMP);
2572 * Workitems are unlike DPCs, in that they run in a user-mode thread
2573 * context rather than at DISPATCH_LEVEL in kernel context. In our
2574 * case we run them in kernel context anyway.
2577 ntoskrnl_workitem_thread(void *arg)
2586 InitializeListHead(&kq->kq_disp);
2587 kq->kq_td = curthread;
2589 KeInitializeSpinLock(&kq->kq_lock);
2590 KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE);
2593 KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL);
2595 KeAcquireSpinLock(&kq->kq_lock, &irql);
2599 KeReleaseSpinLock(&kq->kq_lock, irql);
2603 while (!IsListEmpty(&kq->kq_disp)) {
2604 l = RemoveHeadList(&kq->kq_disp);
2605 iw = CONTAINING_RECORD(l,
2606 io_workitem, iw_listentry);
2607 InitializeListHead((&iw->iw_listentry));
2608 if (iw->iw_func == NULL)
2610 KeReleaseSpinLock(&kq->kq_lock, irql);
2611 MSCALL2(iw->iw_func, iw->iw_dobj, iw->iw_ctx);
2612 KeAcquireSpinLock(&kq->kq_lock, &irql);
2615 KeReleaseSpinLock(&kq->kq_lock, irql);
2620 return; /* notreached */
2624 RtlCharToInteger(const char *src, uint32_t base, uint32_t *val)
2630 return (STATUS_ACCESS_VIOLATION);
2631 while (*src != '\0' && *src <= ' ')
2635 else if (*src == '-') {
2646 } else if (*src == 'o') {
2649 } else if (*src == 'x') {
2654 } else if (!(base == 2 || base == 8 || base == 10 || base == 16))
2655 return (STATUS_INVALID_PARAMETER);
2657 for (res = 0; *src; src++) {
2661 else if (isxdigit(*src))
2662 v = tolower(*src) - 'a' + 10;
2666 return (STATUS_INVALID_PARAMETER);
2667 res = res * base + v;
2669 *val = negative ? -res : res;
2670 return (STATUS_SUCCESS);
2674 ntoskrnl_destroy_workitem_threads(void)
2679 for (i = 0; i < WORKITEM_THREADS; i++) {
2682 KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
2684 tsleep(kq->kq_td, 0, "waitiw", hz/10);
2689 IoAllocateWorkItem(device_object *dobj)
2693 iw = objcache_get(iw_cache, M_NOWAIT);
2697 InitializeListHead(&iw->iw_listentry);
2700 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
2701 iw->iw_idx = wq_idx;
2702 WORKIDX_INC(wq_idx);
2703 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
2709 IoFreeWorkItem(io_workitem *iw)
2711 objcache_put(iw_cache, iw);
2715 IoQueueWorkItem(io_workitem *iw, io_workitem_func iw_func, uint32_t qtype,
2723 kq = wq_queues + iw->iw_idx;
2725 KeAcquireSpinLock(&kq->kq_lock, &irql);
2728 * Traverse the list and make sure this workitem hasn't
2729 * already been inserted. Queuing the same workitem
2730 * twice will hose the list but good.
2733 l = kq->kq_disp.nle_flink;
2734 while (l != &kq->kq_disp) {
2735 cur = CONTAINING_RECORD(l, io_workitem, iw_listentry);
2737 /* Already queued -- do nothing. */
2738 KeReleaseSpinLock(&kq->kq_lock, irql);
2744 iw->iw_func = iw_func;
2747 InsertTailList((&kq->kq_disp), (&iw->iw_listentry));
2748 KeReleaseSpinLock(&kq->kq_lock, irql);
2750 KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
2754 ntoskrnl_workitem(device_object *dobj, void *arg)
2761 w = (work_queue_item *)dobj;
2762 f = (work_item_func)w->wqi_func;
2763 objcache_put(iw_cache, iw);
2764 MSCALL2(f, w, w->wqi_ctx);
2768 * The ExQueueWorkItem() API is deprecated in Windows XP. Microsoft
2769 * warns that it's unsafe and to use IoQueueWorkItem() instead. The
2770 * problem with ExQueueWorkItem() is that it can't guard against
2771 * the condition where a driver submits a job to the work queue and
2772 * is then unloaded before the job is able to run. IoQueueWorkItem()
2773 * acquires a reference to the device's device_object via the
2774 * object manager and retains it until after the job has completed,
2775 * which prevents the driver from being unloaded before the job
2776 * runs. (We don't currently support this behavior, though hopefully
2777 * that will change once the object manager API is fleshed out a bit.)
2779 * Having said all that, the ExQueueWorkItem() API remains, because
2780 * there are still other parts of Windows that use it, including
2781 * NDIS itself: NdisScheduleWorkItem() calls ExQueueWorkItem().
2782 * We fake up the ExQueueWorkItem() API on top of our implementation
2783 * of IoQueueWorkItem(). Workitem thread #3 is reserved exclusively
2784 * for ExQueueWorkItem() jobs, and we pass a pointer to the work
2785 * queue item (provided by the caller) in to IoAllocateWorkItem()
2786 * instead of the device_object. We need to save this pointer so
2787 * we can apply a sanity check: as with the DPC queue and other
2788 * workitem queues, we can't allow the same work queue item to
2789 * be queued twice. If it's already pending, we silently return
2793 ExQueueWorkItem(work_queue_item *w, uint32_t qtype)
2796 io_workitem_func iwf;
2804 * We need to do a special sanity test to make sure
2805 * the ExQueueWorkItem() API isn't used to queue
2806 * the same workitem twice. Rather than checking the
2807 * io_workitem pointer itself, we test the attached
2808 * device object, which is really a pointer to the
2809 * legacy work queue item structure.
2812 kq = wq_queues + WORKITEM_LEGACY_THREAD;
2813 KeAcquireSpinLock(&kq->kq_lock, &irql);
2814 l = kq->kq_disp.nle_flink;
2815 while (l != &kq->kq_disp) {
2816 cur = CONTAINING_RECORD(l, io_workitem, iw_listentry);
2817 if (cur->iw_dobj == (device_object *)w) {
2818 /* Already queued -- do nothing. */
2819 KeReleaseSpinLock(&kq->kq_lock, irql);
2824 KeReleaseSpinLock(&kq->kq_lock, irql);
2826 iw = IoAllocateWorkItem((device_object *)w);
2830 iw->iw_idx = WORKITEM_LEGACY_THREAD;
2831 iwf = (io_workitem_func)ntoskrnl_findwrap((funcptr)ntoskrnl_workitem);
2832 IoQueueWorkItem(iw, iwf, qtype, iw);
2836 RtlZeroMemory(void *dst, size_t len)
2842 RtlSecureZeroMemory(void *dst, size_t len)
2844 memset(dst, 0, len);
2848 RtlFillMemory(void *dst, size_t len, uint8_t c)
2850 memset(dst, c, len);
2854 RtlMoveMemory(void *dst, const void *src, size_t len)
2856 memmove(dst, src, len);
2860 RtlCopyMemory(void *dst, const void *src, size_t len)
2862 bcopy(src, dst, len);
2866 RtlCompareMemory(const void *s1, const void *s2, size_t len)
2871 m1 = __DECONST(char *, s1);
2872 m2 = __DECONST(char *, s2);
2874 for (i = 0; i < len && m1[i] == m2[i]; i++);
2879 RtlInitAnsiString(ansi_string *dst, char *src)
2887 a->as_len = a->as_maxlen = 0;
2891 a->as_len = a->as_maxlen = strlen(src);
2896 RtlInitUnicodeString(unicode_string *dst, uint16_t *src)
2905 u->us_len = u->us_maxlen = 0;
2912 u->us_len = u->us_maxlen = i * 2;
2917 RtlUnicodeStringToInteger(unicode_string *ustr, uint32_t base, uint32_t *val)
2924 uchr = ustr->us_buf;
2926 bzero(abuf, sizeof(abuf));
2928 if ((char)((*uchr) & 0xFF) == '-') {
2932 } else if ((char)((*uchr) & 0xFF) == '+') {
2939 if ((char)((*uchr) & 0xFF) == 'b') {
2943 } else if ((char)((*uchr) & 0xFF) == 'o') {
2947 } else if ((char)((*uchr) & 0xFF) == 'x') {
2961 ntoskrnl_unicode_to_ascii(uchr, astr, len);
2962 *val = strtoul(abuf, NULL, base);
2964 return (STATUS_SUCCESS);
2968 RtlFreeUnicodeString(unicode_string *ustr)
2970 if (ustr->us_buf == NULL)
2972 ExFreePool(ustr->us_buf);
2973 ustr->us_buf = NULL;
2977 RtlFreeAnsiString(ansi_string *astr)
2979 if (astr->as_buf == NULL)
2981 ExFreePool(astr->as_buf);
2982 astr->as_buf = NULL;
2986 atoi(const char *str)
2988 return (int)strtol(str, NULL, 10);
2992 atol(const char *str)
2994 return strtol(str, NULL, 10);
3003 skrandom(tv.tv_usec);
3004 return ((int)krandom());
3008 srand(unsigned int seed)
3014 IoIsWdmVersionAvailable(uint8_t major, uint8_t minor)
3016 if (major == WDM_MAJOR && minor == WDM_MINOR_WINXP)
3022 IoOpenDeviceRegistryKey(struct device_object *devobj, uint32_t type,
3023 uint32_t mask, void **key)
3025 return (NDIS_STATUS_INVALID_DEVICE_REQUEST);
3029 IoGetDeviceObjectPointer(unicode_string *name, uint32_t reqaccess,
3030 void *fileobj, device_object *devobj)
3032 return (STATUS_SUCCESS);
3036 IoGetDeviceProperty(device_object *devobj, uint32_t regprop, uint32_t buflen,
3037 void *prop, uint32_t *reslen)
3042 drv = devobj->do_drvobj;
3045 case DEVPROP_DRIVER_KEYNAME:
3047 *name = drv->dro_drivername.us_buf;
3048 *reslen = drv->dro_drivername.us_len;
3051 return (STATUS_INVALID_PARAMETER_2);
3055 return (STATUS_SUCCESS);
3059 KeInitializeMutex(kmutant *kmutex, uint32_t level)
3061 InitializeListHead((&kmutex->km_header.dh_waitlisthead));
3062 kmutex->km_abandoned = FALSE;
3063 kmutex->km_apcdisable = 1;
3064 kmutex->km_header.dh_sigstate = 1;
3065 kmutex->km_header.dh_type = DISP_TYPE_MUTANT;
3066 kmutex->km_header.dh_size = sizeof(kmutant) / sizeof(uint32_t);
3067 kmutex->km_ownerthread = NULL;
3071 KeReleaseMutex(kmutant *kmutex, uint8_t kwait)
3075 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3076 prevstate = kmutex->km_header.dh_sigstate;
3077 if (kmutex->km_ownerthread != curthread) {
3078 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3079 return (STATUS_MUTANT_NOT_OWNED);
3082 kmutex->km_header.dh_sigstate++;
3083 kmutex->km_abandoned = FALSE;
3085 if (kmutex->km_header.dh_sigstate == 1) {
3086 kmutex->km_ownerthread = NULL;
3087 ntoskrnl_waittest(&kmutex->km_header, IO_NO_INCREMENT);
3090 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3096 KeReadStateMutex(kmutant *kmutex)
3098 return (kmutex->km_header.dh_sigstate);
3102 KeInitializeEvent(nt_kevent *kevent, uint32_t type, uint8_t state)
3104 InitializeListHead((&kevent->k_header.dh_waitlisthead));
3105 kevent->k_header.dh_sigstate = state;
3106 if (type == EVENT_TYPE_NOTIFY)
3107 kevent->k_header.dh_type = DISP_TYPE_NOTIFICATION_EVENT;
3109 kevent->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_EVENT;
3110 kevent->k_header.dh_size = sizeof(nt_kevent) / sizeof(uint32_t);
3114 KeResetEvent(nt_kevent *kevent)
3118 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3119 prevstate = kevent->k_header.dh_sigstate;
3120 kevent->k_header.dh_sigstate = FALSE;
3121 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3127 KeSetEvent(nt_kevent *kevent, uint32_t increment, uint8_t kwait)
3131 nt_dispatch_header *dh;
3134 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3135 prevstate = kevent->k_header.dh_sigstate;
3136 dh = &kevent->k_header;
3138 if (IsListEmpty(&dh->dh_waitlisthead))
3140 * If there's nobody in the waitlist, just set
3141 * the state to signalled.
3143 dh->dh_sigstate = 1;
3146 * Get the first waiter. If this is a synchronization
3147 * event, just wake up that one thread (don't bother
3148 * setting the state to signalled since we're supposed
3149 * to automatically clear synchronization events anyway).
3151 * If it's a notification event, or the first
3152 * waiter is doing a WAITTYPE_ALL wait, go through
3153 * the full wait satisfaction process.
3155 w = CONTAINING_RECORD(dh->dh_waitlisthead.nle_flink,
3156 wait_block, wb_waitlist);
3158 if (kevent->k_header.dh_type == DISP_TYPE_NOTIFICATION_EVENT ||
3159 w->wb_waittype == WAITTYPE_ALL) {
3160 if (prevstate == 0) {
3161 dh->dh_sigstate = 1;
3162 ntoskrnl_waittest(dh, increment);
3165 w->wb_awakened |= TRUE;
3166 cv_broadcastpri(&we->we_cv,
3167 (w->wb_oldpri - (increment * 4)) > PRI_MIN_KERN ?
3168 w->wb_oldpri - (increment * 4) : PRI_MIN_KERN);
3172 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3178 KeClearEvent(nt_kevent *kevent)
3180 kevent->k_header.dh_sigstate = FALSE;
3184 KeReadStateEvent(nt_kevent *kevent)
3186 return (kevent->k_header.dh_sigstate);
3190 * The object manager in Windows is responsible for managing
3191 * references and access to various types of objects, including
3192 * device_objects, events, threads, timers and so on. However,
3193 * there's a difference in the way objects are handled in user
3194 * mode versus kernel mode.
3196 * In user mode (i.e. Win32 applications), all objects are
3197 * managed by the object manager. For example, when you create
3198 * a timer or event object, you actually end up with an
3199 * object_header (for the object manager's bookkeeping
3200 * purposes) and an object body (which contains the actual object
3201 * structure, e.g. ktimer, kevent, etc...). This allows Windows
3202 * to manage resource quotas and to enforce access restrictions
3203 * on basically every kind of system object handled by the kernel.
3205 * However, in kernel mode, you only end up using the object
3206 * manager some of the time. For example, in a driver, you create
3207 * a timer object by simply allocating the memory for a ktimer
3208 * structure and initializing it with KeInitializeTimer(). Hence,
3209 * the timer has no object_header and no reference counting or
3210 * security/resource checks are done on it. The assumption in
3211 * this case is that if you're running in kernel mode, you know
3212 * what you're doing, and you're already at an elevated privilege
3215 * There are some exceptions to this. The two most important ones
3216 * for our purposes are device_objects and threads. We need to use
3217 * the object manager to do reference counting on device_objects,
3218 * and for threads, you can only get a pointer to a thread's
3219 * dispatch header by using ObReferenceObjectByHandle() on the
3220 * handle returned by PsCreateSystemThread().
3224 ObReferenceObjectByHandle(ndis_handle handle, uint32_t reqaccess, void *otype,
3225 uint8_t accessmode, void **object, void **handleinfo)
3229 nr = kmalloc(sizeof(nt_objref), M_DEVBUF, M_NOWAIT|M_ZERO);
3231 return (STATUS_INSUFFICIENT_RESOURCES);
3233 InitializeListHead((&nr->no_dh.dh_waitlisthead));
3234 nr->no_obj = handle;
3235 nr->no_dh.dh_type = DISP_TYPE_THREAD;
3236 nr->no_dh.dh_sigstate = 0;
3237 nr->no_dh.dh_size = (uint8_t)(sizeof(struct thread) /
3239 TAILQ_INSERT_TAIL(&ntoskrnl_reflist, nr, link);
3242 return (STATUS_SUCCESS);
3246 ObfDereferenceObject(void *object)
3251 TAILQ_REMOVE(&ntoskrnl_reflist, nr, link);
3252 kfree(nr, M_DEVBUF);
3256 ZwClose(ndis_handle handle)
3258 return (STATUS_SUCCESS);
3262 WmiQueryTraceInformation(uint32_t traceclass, void *traceinfo,
3263 uint32_t infolen, uint32_t reqlen, void *buf)
3265 return (STATUS_NOT_FOUND);
3269 WmiTraceMessage(uint64_t loghandle, uint32_t messageflags,
3270 void *guid, uint16_t messagenum, ...)
3272 return (STATUS_SUCCESS);
3276 IoWMIRegistrationControl(device_object *dobj, uint32_t action)
3278 return (STATUS_SUCCESS);
3282 * This is here just in case the thread returns without calling
3283 * PsTerminateSystemThread().
3286 ntoskrnl_thrfunc(void *arg)
3288 thread_context *thrctx;
3289 uint32_t (*tfunc)(void *);
3294 tfunc = thrctx->tc_thrfunc;
3295 tctx = thrctx->tc_thrctx;
3296 kfree(thrctx, M_TEMP);
3298 rval = MSCALL1(tfunc, tctx);
3300 PsTerminateSystemThread(rval);
3301 return; /* notreached */
3305 PsCreateSystemThread(ndis_handle *handle, uint32_t reqaccess, void *objattrs,
3306 ndis_handle phandle, void *clientid, void *thrfunc, void *thrctx)
3312 tc = kmalloc(sizeof(thread_context), M_TEMP, M_NOWAIT);
3314 return (STATUS_INSUFFICIENT_RESOURCES);
3316 tc->tc_thrctx = thrctx;
3317 tc->tc_thrfunc = thrfunc;
3319 error = kthread_create(ntoskrnl_thrfunc, tc, &p, "Win kthread %d",
3324 return (STATUS_INSUFFICIENT_RESOURCES);
3330 return (STATUS_SUCCESS);
3334 * In Windows, the exit of a thread is an event that you're allowed
3335 * to wait on, assuming you've obtained a reference to the thread using
3336 * ObReferenceObjectByHandle(). Unfortunately, the only way we can
3337 * simulate this behavior is to register each thread we create in a
3338 * reference list, and if someone holds a reference to us, we poke
3342 PsTerminateSystemThread(ndis_status status)
3344 struct nt_objref *nr;
3346 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3347 TAILQ_FOREACH(nr, &ntoskrnl_reflist, link) {
3348 if (nr->no_obj != curthread->td_proc)
3350 nr->no_dh.dh_sigstate = 1;
3351 ntoskrnl_waittest(&nr->no_dh, IO_NO_INCREMENT);
3354 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3360 return (0); /* notreached */
3364 DbgPrint(char *fmt, ...)
3369 __va_start(ap, fmt);
3374 return (STATUS_SUCCESS);
3381 Debugger("DbgBreakPoint(): breakpoint");
3385 KeBugCheckEx(uint32_t code, u_long param1, u_long param2, u_long param3,
3388 panic("KeBugCheckEx: STOP 0x%X", code);
3392 ntoskrnl_timercall(void *arg)
3398 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3402 #ifdef NTOSKRNL_DEBUG_TIMERS
3403 ntoskrnl_timer_fires++;
3405 ntoskrnl_remove_timer(timer);
3408 * This should never happen, but complain
3412 if (timer->k_header.dh_inserted == FALSE) {
3413 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3414 kprintf("NTOS: timer %p fired even though "
3415 "it was canceled\n", timer);
3419 /* Mark the timer as no longer being on the timer queue. */
3421 timer->k_header.dh_inserted = FALSE;
3423 /* Now signal the object and satisfy any waits on it. */
3425 timer->k_header.dh_sigstate = 1;
3426 ntoskrnl_waittest(&timer->k_header, IO_NO_INCREMENT);
3429 * If this is a periodic timer, re-arm it
3430 * so it will fire again. We do this before
3431 * calling any deferred procedure calls because
3432 * it's possible the DPC might cancel the timer,
3433 * in which case it would be wrong for us to
3434 * re-arm it again afterwards.
3437 if (timer->k_period) {
3439 tv.tv_usec = timer->k_period * 1000;
3440 timer->k_header.dh_inserted = TRUE;
3441 ntoskrnl_insert_timer(timer, tvtohz_high(&tv));
3442 #ifdef NTOSKRNL_DEBUG_TIMERS
3443 ntoskrnl_timer_reloads++;
3449 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3451 /* If there's a DPC associated with the timer, queue it up. */
3454 KeInsertQueueDpc(dpc, NULL, NULL);
3457 #ifdef NTOSKRNL_DEBUG_TIMERS
3459 sysctl_show_timers(SYSCTL_HANDLER_ARGS)
3464 ntoskrnl_show_timers();
3465 return (sysctl_handle_int(oidp, &ret, 0, req));
3469 ntoskrnl_show_timers(void)
3474 mtx_spinlock(&ntoskrnl_calllock);
3475 l = ntoskrnl_calllist.nle_flink;
3476 while(l != &ntoskrnl_calllist) {
3480 mtx_spinunlock(&ntoskrnl_calllock);
3483 kprintf("%d timers available (out of %d)\n", i, NTOSKRNL_TIMEOUTS);
3484 kprintf("timer sets: %qu\n", ntoskrnl_timer_sets);
3485 kprintf("timer reloads: %qu\n", ntoskrnl_timer_reloads);
3486 kprintf("timer cancels: %qu\n", ntoskrnl_timer_cancels);
3487 kprintf("timer fires: %qu\n", ntoskrnl_timer_fires);
3493 * Must be called with dispatcher lock held.
3497 ntoskrnl_insert_timer(ktimer *timer, int ticks)
3504 * Try and allocate a timer.
3506 mtx_spinlock(&ntoskrnl_calllock);
3507 if (IsListEmpty(&ntoskrnl_calllist)) {
3508 mtx_spinunlock(&ntoskrnl_calllock);
3509 #ifdef NTOSKRNL_DEBUG_TIMERS
3510 ntoskrnl_show_timers();
3512 panic("out of timers!");
3514 l = RemoveHeadList(&ntoskrnl_calllist);
3515 mtx_spinunlock(&ntoskrnl_calllock);
3517 e = CONTAINING_RECORD(l, callout_entry, ce_list);
3520 timer->k_callout = c;
3523 callout_reset(c, ticks, ntoskrnl_timercall, timer);
3527 ntoskrnl_remove_timer(ktimer *timer)
3531 e = (callout_entry *)timer->k_callout;
3532 callout_stop(timer->k_callout);
3534 mtx_spinlock(&ntoskrnl_calllock);
3535 InsertHeadList((&ntoskrnl_calllist), (&e->ce_list));
3536 mtx_spinunlock(&ntoskrnl_calllock);
3540 KeInitializeTimer(ktimer *timer)
3545 KeInitializeTimerEx(timer, EVENT_TYPE_NOTIFY);
3549 KeInitializeTimerEx(ktimer *timer, uint32_t type)
3554 bzero((char *)timer, sizeof(ktimer));
3555 InitializeListHead((&timer->k_header.dh_waitlisthead));
3556 timer->k_header.dh_sigstate = FALSE;
3557 timer->k_header.dh_inserted = FALSE;
3558 if (type == EVENT_TYPE_NOTIFY)
3559 timer->k_header.dh_type = DISP_TYPE_NOTIFICATION_TIMER;
3561 timer->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_TIMER;
3562 timer->k_header.dh_size = sizeof(ktimer) / sizeof(uint32_t);
3566 * DPC subsystem. A Windows Defered Procedure Call has the following
3568 * - It runs at DISPATCH_LEVEL.
3569 * - It can have one of 3 importance values that control when it
3570 * runs relative to other DPCs in the queue.
3571 * - On SMP systems, it can be set to run on a specific processor.
3572 * In order to satisfy the last property, we create a DPC thread for
3573 * each CPU in the system and bind it to that CPU. Each thread
3574 * maintains three queues with different importance levels, which
3575 * will be processed in order from lowest to highest.
3577 * In Windows, interrupt handlers run as DPCs. (Not to be confused
3578 * with ISRs, which run in interrupt context and can preempt DPCs.)
3579 * ISRs are given the highest importance so that they'll take
3580 * precedence over timers and other things.
3584 ntoskrnl_dpc_thread(void *arg)
3593 InitializeListHead(&kq->kq_disp);
3594 kq->kq_td = curthread;
3596 kq->kq_running = FALSE;
3597 KeInitializeSpinLock(&kq->kq_lock);
3598 KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE);
3599 KeInitializeEvent(&kq->kq_done, EVENT_TYPE_SYNC, FALSE);
3602 * Elevate our priority. DPCs are used to run interrupt
3603 * handlers, and they should trigger as soon as possible
3604 * once scheduled by an ISR.
3607 #ifdef NTOSKRNL_MULTIPLE_DPCS
3608 sched_bind(curthread, kq->kq_cpu);
3610 lwkt_setpri_self(TDPRI_INT_HIGH);
3613 KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL);
3615 KeAcquireSpinLock(&kq->kq_lock, &irql);
3619 KeReleaseSpinLock(&kq->kq_lock, irql);
3623 kq->kq_running = TRUE;
3625 while (!IsListEmpty(&kq->kq_disp)) {
3626 l = RemoveHeadList((&kq->kq_disp));
3627 d = CONTAINING_RECORD(l, kdpc, k_dpclistentry);
3628 InitializeListHead((&d->k_dpclistentry));
3629 KeReleaseSpinLockFromDpcLevel(&kq->kq_lock);
3630 MSCALL4(d->k_deferedfunc, d, d->k_deferredctx,
3631 d->k_sysarg1, d->k_sysarg2);
3632 KeAcquireSpinLockAtDpcLevel(&kq->kq_lock);
3635 kq->kq_running = FALSE;
3637 KeReleaseSpinLock(&kq->kq_lock, irql);
3639 KeSetEvent(&kq->kq_done, IO_NO_INCREMENT, FALSE);
3644 return; /* notreached */
3648 ntoskrnl_destroy_dpc_threads(void)
3655 #ifdef NTOSKRNL_MULTIPLE_DPCS
3656 for (i = 0; i < ncpus; i++) {
3658 for (i = 0; i < 1; i++) {
3663 KeInitializeDpc(&dpc, NULL, NULL);
3664 KeSetTargetProcessorDpc(&dpc, i);
3665 KeInsertQueueDpc(&dpc, NULL, NULL);
3667 tsleep(kq->kq_td, 0, "dpcw", hz/10);
3672 ntoskrnl_insert_dpc(list_entry *head, kdpc *dpc)
3677 l = head->nle_flink;
3679 d = CONTAINING_RECORD(l, kdpc, k_dpclistentry);
3685 if (dpc->k_importance == KDPC_IMPORTANCE_LOW)
3686 InsertTailList((head), (&dpc->k_dpclistentry));
3688 InsertHeadList((head), (&dpc->k_dpclistentry));
3694 KeInitializeDpc(kdpc *dpc, void *dpcfunc, void *dpcctx)
3700 dpc->k_deferedfunc = dpcfunc;
3701 dpc->k_deferredctx = dpcctx;
3702 dpc->k_num = KDPC_CPU_DEFAULT;
3703 dpc->k_importance = KDPC_IMPORTANCE_MEDIUM;
3704 InitializeListHead((&dpc->k_dpclistentry));
3708 KeInsertQueueDpc(kdpc *dpc, void *sysarg1, void *sysarg2)
3719 #ifdef NTOSKRNL_MULTIPLE_DPCS
3720 KeRaiseIrql(DISPATCH_LEVEL, &irql);
3723 * By default, the DPC is queued to run on the same CPU
3724 * that scheduled it.
3727 if (dpc->k_num == KDPC_CPU_DEFAULT)
3728 kq += curthread->td_oncpu;
3731 KeAcquireSpinLockAtDpcLevel(&kq->kq_lock);
3733 KeAcquireSpinLock(&kq->kq_lock, &irql);
3736 r = ntoskrnl_insert_dpc(&kq->kq_disp, dpc);
3738 dpc->k_sysarg1 = sysarg1;
3739 dpc->k_sysarg2 = sysarg2;
3741 KeReleaseSpinLock(&kq->kq_lock, irql);
3746 KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
3752 KeRemoveQueueDpc(kdpc *dpc)
3760 #ifdef NTOSKRNL_MULTIPLE_DPCS
3761 KeRaiseIrql(DISPATCH_LEVEL, &irql);
3763 kq = kq_queues + dpc->k_num;
3765 KeAcquireSpinLockAtDpcLevel(&kq->kq_lock);
3768 KeAcquireSpinLock(&kq->kq_lock, &irql);
3771 if (dpc->k_dpclistentry.nle_flink == &dpc->k_dpclistentry) {
3772 KeReleaseSpinLockFromDpcLevel(&kq->kq_lock);
3777 RemoveEntryList((&dpc->k_dpclistentry));
3778 InitializeListHead((&dpc->k_dpclistentry));
3780 KeReleaseSpinLock(&kq->kq_lock, irql);
3786 KeSetImportanceDpc(kdpc *dpc, uint32_t imp)
3788 if (imp != KDPC_IMPORTANCE_LOW &&
3789 imp != KDPC_IMPORTANCE_MEDIUM &&
3790 imp != KDPC_IMPORTANCE_HIGH)
3793 dpc->k_importance = (uint8_t)imp;
3797 KeSetTargetProcessorDpc(kdpc *dpc, uint8_t cpu)
3806 KeFlushQueuedDpcs(void)
3812 * Poke each DPC queue and wait
3813 * for them to drain.
3816 #ifdef NTOSKRNL_MULTIPLE_DPCS
3817 for (i = 0; i < ncpus; i++) {
3819 for (i = 0; i < 1; i++) {
3822 KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE);
3823 KeWaitForSingleObject(&kq->kq_done, 0, 0, TRUE, NULL);
3828 KeGetCurrentProcessorNumber(void)
3830 return (curthread->td_gd->gd_cpuid);
3834 KeSetTimerEx(ktimer *timer, int64_t duetime, uint32_t period, kdpc *dpc)
3843 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3845 if (timer->k_header.dh_inserted == TRUE) {
3846 ntoskrnl_remove_timer(timer);
3847 #ifdef NTOSKRNL_DEBUG_TIMERS
3848 ntoskrnl_timer_cancels++;
3850 timer->k_header.dh_inserted = FALSE;
3855 timer->k_duetime = duetime;
3856 timer->k_period = period;
3857 timer->k_header.dh_sigstate = FALSE;
3861 tv.tv_sec = - (duetime) / 10000000;
3862 tv.tv_usec = (- (duetime) / 10) -
3863 (tv.tv_sec * 1000000);
3865 ntoskrnl_time(&curtime);
3866 if (duetime < curtime)
3867 tv.tv_sec = tv.tv_usec = 0;
3869 tv.tv_sec = ((duetime) - curtime) / 10000000;
3870 tv.tv_usec = ((duetime) - curtime) / 10 -
3871 (tv.tv_sec * 1000000);
3875 timer->k_header.dh_inserted = TRUE;
3876 ntoskrnl_insert_timer(timer, tvtohz_high(&tv));
3877 #ifdef NTOSKRNL_DEBUG_TIMERS
3878 ntoskrnl_timer_sets++;
3881 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3887 KeSetTimer(ktimer *timer, int64_t duetime, kdpc *dpc)
3889 return (KeSetTimerEx(timer, duetime, 0, dpc));
3893 * The Windows DDK documentation seems to say that cancelling
3894 * a timer that has a DPC will result in the DPC also being
3895 * cancelled, but this isn't really the case.
3899 KeCancelTimer(ktimer *timer)
3906 lockmgr(&ntoskrnl_dispatchlock, LK_EXCLUSIVE);
3908 pending = timer->k_header.dh_inserted;
3910 if (timer->k_header.dh_inserted == TRUE) {
3911 timer->k_header.dh_inserted = FALSE;
3912 ntoskrnl_remove_timer(timer);
3913 #ifdef NTOSKRNL_DEBUG_TIMERS
3914 ntoskrnl_timer_cancels++;
3918 lockmgr(&ntoskrnl_dispatchlock, LK_RELEASE);
3924 KeReadStateTimer(ktimer *timer)
3926 return (timer->k_header.dh_sigstate);
3930 KeDelayExecutionThread(uint8_t wait_mode, uint8_t alertable, int64_t *interval)
3935 panic("invalid wait_mode %d", wait_mode);
3937 KeInitializeTimer(&timer);
3938 KeSetTimer(&timer, *interval, NULL);
3939 KeWaitForSingleObject(&timer, 0, 0, alertable, NULL);
3941 return STATUS_SUCCESS;
3945 KeQueryInterruptTime(void)
3950 getmicrouptime(&tv);
3952 ticks = tvtohz_high(&tv);
3954 return ticks * ((10000000 + hz - 1) / hz);
3957 static struct thread *
3958 KeGetCurrentThread(void)
3965 KeSetPriorityThread(struct thread *td, int32_t pri)
3970 return LOW_REALTIME_PRIORITY;
3972 if (td->td_pri >= TDPRI_INT_HIGH)
3973 old = HIGH_PRIORITY;
3974 else if (td->td_pri <= TDPRI_IDLE_WORK)
3977 old = LOW_REALTIME_PRIORITY;
3979 if (pri == HIGH_PRIORITY)
3980 lwkt_setpri(td, TDPRI_INT_HIGH);
3981 if (pri == LOW_REALTIME_PRIORITY)
3982 lwkt_setpri(td, TDPRI_SOFT_TIMER);
3983 if (pri == LOW_PRIORITY)
3984 lwkt_setpri(td, TDPRI_IDLE_WORK);
3992 kprintf("ntoskrnl dummy called...\n");
3996 image_patch_table ntoskrnl_functbl[] = {
3997 IMPORT_SFUNC(RtlZeroMemory, 2),
3998 IMPORT_SFUNC(RtlSecureZeroMemory, 2),
3999 IMPORT_SFUNC(RtlFillMemory, 3),
4000 IMPORT_SFUNC(RtlMoveMemory, 3),
4001 IMPORT_SFUNC(RtlCharToInteger, 3),
4002 IMPORT_SFUNC(RtlCopyMemory, 3),
4003 IMPORT_SFUNC(RtlCopyString, 2),
4004 IMPORT_SFUNC(RtlCompareMemory, 3),
4005 IMPORT_SFUNC(RtlEqualUnicodeString, 3),
4006 IMPORT_SFUNC(RtlCopyUnicodeString, 2),
4007 IMPORT_SFUNC(RtlUnicodeStringToAnsiString, 3),
4008 IMPORT_SFUNC(RtlAnsiStringToUnicodeString, 3),
4009 IMPORT_SFUNC(RtlInitAnsiString, 2),
4010 IMPORT_SFUNC_MAP(RtlInitString, RtlInitAnsiString, 2),
4011 IMPORT_SFUNC(RtlInitUnicodeString, 2),
4012 IMPORT_SFUNC(RtlFreeAnsiString, 1),
4013 IMPORT_SFUNC(RtlFreeUnicodeString, 1),
4014 IMPORT_SFUNC(RtlUnicodeStringToInteger, 3),
4015 IMPORT_CFUNC_MAP(sprintf, ksprintf, 0),
4016 IMPORT_CFUNC_MAP(vsprintf, kvsprintf, 0),
4017 IMPORT_CFUNC_MAP(_snprintf, ksnprintf, 0),
4018 IMPORT_CFUNC_MAP(_vsnprintf, kvsnprintf, 0),
4019 IMPORT_CFUNC(DbgPrint, 0),
4020 IMPORT_SFUNC(DbgBreakPoint, 0),
4021 IMPORT_SFUNC(KeBugCheckEx, 5),
4022 IMPORT_CFUNC(strncmp, 0),
4023 IMPORT_CFUNC(strcmp, 0),
4024 IMPORT_CFUNC_MAP(stricmp, strcasecmp, 0),
4025 IMPORT_CFUNC(strncpy, 0),
4026 IMPORT_CFUNC(strcpy, 0),
4027 IMPORT_CFUNC(strlen, 0),
4028 IMPORT_CFUNC_MAP(toupper, ntoskrnl_toupper, 0),
4029 IMPORT_CFUNC_MAP(tolower, ntoskrnl_tolower, 0),
4030 IMPORT_CFUNC_MAP(strstr, ntoskrnl_strstr, 0),
4031 IMPORT_CFUNC_MAP(strncat, ntoskrnl_strncat, 0),
4032 IMPORT_CFUNC_MAP(strchr, index, 0),
4033 IMPORT_CFUNC_MAP(strrchr, rindex, 0),
4034 IMPORT_CFUNC(memcpy, 0),
4035 IMPORT_CFUNC_MAP(memmove, ntoskrnl_memmove, 0),
4036 IMPORT_CFUNC_MAP(memset, ntoskrnl_memset, 0),
4037 IMPORT_CFUNC_MAP(memchr, ntoskrnl_memchr, 0),
4038 IMPORT_SFUNC(IoAllocateDriverObjectExtension, 4),
4039 IMPORT_SFUNC(IoGetDriverObjectExtension, 2),
4040 IMPORT_FFUNC(IofCallDriver, 2),
4041 IMPORT_FFUNC(IofCompleteRequest, 2),
4042 IMPORT_SFUNC(IoAcquireCancelSpinLock, 1),
4043 IMPORT_SFUNC(IoReleaseCancelSpinLock, 1),
4044 IMPORT_SFUNC(IoCancelIrp, 1),
4045 IMPORT_SFUNC(IoConnectInterrupt, 11),
4046 IMPORT_SFUNC(IoDisconnectInterrupt, 1),
4047 IMPORT_SFUNC(IoCreateDevice, 7),
4048 IMPORT_SFUNC(IoDeleteDevice, 1),
4049 IMPORT_SFUNC(IoGetAttachedDevice, 1),
4050 IMPORT_SFUNC(IoAttachDeviceToDeviceStack, 2),
4051 IMPORT_SFUNC(IoDetachDevice, 1),
4052 IMPORT_SFUNC(IoBuildSynchronousFsdRequest, 7),
4053 IMPORT_SFUNC(IoBuildAsynchronousFsdRequest, 6),
4054 IMPORT_SFUNC(IoBuildDeviceIoControlRequest, 9),
4055 IMPORT_SFUNC(IoAllocateIrp, 2),
4056 IMPORT_SFUNC(IoReuseIrp, 2),
4057 IMPORT_SFUNC(IoMakeAssociatedIrp, 2),
4058 IMPORT_SFUNC(IoFreeIrp, 1),
4059 IMPORT_SFUNC(IoInitializeIrp, 3),
4060 IMPORT_SFUNC(KeAcquireInterruptSpinLock, 1),
4061 IMPORT_SFUNC(KeReleaseInterruptSpinLock, 2),
4062 IMPORT_SFUNC(KeSynchronizeExecution, 3),
4063 IMPORT_SFUNC(KeWaitForSingleObject, 5),
4064 IMPORT_SFUNC(KeWaitForMultipleObjects, 8),
4065 IMPORT_SFUNC(_allmul, 4),
4066 IMPORT_SFUNC(_alldiv, 4),
4067 IMPORT_SFUNC(_allrem, 4),
4068 IMPORT_RFUNC(_allshr, 0),
4069 IMPORT_RFUNC(_allshl, 0),
4070 IMPORT_SFUNC(_aullmul, 4),
4071 IMPORT_SFUNC(_aulldiv, 4),
4072 IMPORT_SFUNC(_aullrem, 4),
4073 IMPORT_RFUNC(_aullshr, 0),
4074 IMPORT_RFUNC(_aullshl, 0),
4075 IMPORT_CFUNC(atoi, 0),
4076 IMPORT_CFUNC(atol, 0),
4077 IMPORT_CFUNC(rand, 0),
4078 IMPORT_CFUNC(srand, 0),
4079 IMPORT_SFUNC(WRITE_REGISTER_USHORT, 2),
4080 IMPORT_SFUNC(READ_REGISTER_USHORT, 1),
4081 IMPORT_SFUNC(WRITE_REGISTER_ULONG, 2),
4082 IMPORT_SFUNC(READ_REGISTER_ULONG, 1),
4083 IMPORT_SFUNC(READ_REGISTER_UCHAR, 1),
4084 IMPORT_SFUNC(WRITE_REGISTER_UCHAR, 2),
4085 IMPORT_SFUNC(ExInitializePagedLookasideList, 7),
4086 IMPORT_SFUNC(ExDeletePagedLookasideList, 1),
4087 IMPORT_SFUNC(ExInitializeNPagedLookasideList, 7),
4088 IMPORT_SFUNC(ExDeleteNPagedLookasideList, 1),
4089 IMPORT_FFUNC(InterlockedPopEntrySList, 1),
4090 IMPORT_FFUNC(InitializeSListHead, 1),
4091 IMPORT_FFUNC(InterlockedPushEntrySList, 2),
4092 IMPORT_SFUNC(ExQueryDepthSList, 1),
4093 IMPORT_FFUNC_MAP(ExpInterlockedPopEntrySList,
4094 InterlockedPopEntrySList, 1),
4095 IMPORT_FFUNC_MAP(ExpInterlockedPushEntrySList,
4096 InterlockedPushEntrySList, 2),
4097 IMPORT_FFUNC(ExInterlockedPopEntrySList, 2),
4098 IMPORT_FFUNC(ExInterlockedPushEntrySList, 3),
4099 IMPORT_SFUNC(ExAllocatePoolWithTag, 3),
4100 IMPORT_SFUNC(ExFreePoolWithTag, 2),
4101 IMPORT_SFUNC(ExFreePool, 1),
4103 * For AMD64, we can get away with just mapping
4104 * KeAcquireSpinLockRaiseToDpc() directly to KfAcquireSpinLock()
4105 * because the calling conventions end up being the same.
4107 IMPORT_SFUNC(KeAcquireSpinLockAtDpcLevel, 1),
4108 IMPORT_SFUNC(KeReleaseSpinLockFromDpcLevel, 1),
4109 IMPORT_SFUNC_MAP(KeAcquireSpinLockRaiseToDpc, KfAcquireSpinLock, 1),
4110 IMPORT_SFUNC_MAP(KeReleaseSpinLock, KfReleaseSpinLock, 1),
4111 IMPORT_FFUNC(InterlockedIncrement, 1),
4112 IMPORT_FFUNC(InterlockedDecrement, 1),
4113 IMPORT_FFUNC(InterlockedExchange, 2),
4114 IMPORT_FFUNC(ExInterlockedAddLargeStatistic, 2),
4115 IMPORT_SFUNC(IoAllocateMdl, 5),
4116 IMPORT_SFUNC(IoFreeMdl, 1),
4117 IMPORT_SFUNC(MmAllocateContiguousMemory, 2 + 1),
4118 IMPORT_SFUNC(MmAllocateContiguousMemorySpecifyCache, 5 + 3),
4119 IMPORT_SFUNC(MmFreeContiguousMemory, 1),
4120 IMPORT_SFUNC(MmFreeContiguousMemorySpecifyCache, 3),
4121 IMPORT_SFUNC(MmSizeOfMdl, 1),
4122 IMPORT_SFUNC(MmMapLockedPages, 2),
4123 IMPORT_SFUNC(MmMapLockedPagesSpecifyCache, 6),
4124 IMPORT_SFUNC(MmUnmapLockedPages, 2),
4125 IMPORT_SFUNC(MmBuildMdlForNonPagedPool, 1),
4126 IMPORT_SFUNC(MmGetPhysicalAddress, 1),
4127 IMPORT_SFUNC(MmGetSystemRoutineAddress, 1),
4128 IMPORT_SFUNC(MmIsAddressValid, 1),
4129 IMPORT_SFUNC(MmMapIoSpace, 3 + 1),
4130 IMPORT_SFUNC(MmUnmapIoSpace, 2),
4131 IMPORT_SFUNC(KeInitializeSpinLock, 1),
4132 IMPORT_SFUNC(IoIsWdmVersionAvailable, 2),
4133 IMPORT_SFUNC(IoOpenDeviceRegistryKey, 4),
4134 IMPORT_SFUNC(IoGetDeviceObjectPointer, 4),
4135 IMPORT_SFUNC(IoGetDeviceProperty, 5),
4136 IMPORT_SFUNC(IoAllocateWorkItem, 1),
4137 IMPORT_SFUNC(IoFreeWorkItem, 1),
4138 IMPORT_SFUNC(IoQueueWorkItem, 4),
4139 IMPORT_SFUNC(ExQueueWorkItem, 2),
4140 IMPORT_SFUNC(ntoskrnl_workitem, 2),
4141 IMPORT_SFUNC(KeInitializeMutex, 2),
4142 IMPORT_SFUNC(KeReleaseMutex, 2),
4143 IMPORT_SFUNC(KeReadStateMutex, 1),
4144 IMPORT_SFUNC(KeInitializeEvent, 3),
4145 IMPORT_SFUNC(KeSetEvent, 3),
4146 IMPORT_SFUNC(KeResetEvent, 1),
4147 IMPORT_SFUNC(KeClearEvent, 1),
4148 IMPORT_SFUNC(KeReadStateEvent, 1),
4149 IMPORT_SFUNC(KeInitializeTimer, 1),
4150 IMPORT_SFUNC(KeInitializeTimerEx, 2),
4151 IMPORT_SFUNC(KeSetTimer, 3),
4152 IMPORT_SFUNC(KeSetTimerEx, 4),
4153 IMPORT_SFUNC(KeCancelTimer, 1),
4154 IMPORT_SFUNC(KeReadStateTimer, 1),
4155 IMPORT_SFUNC(KeInitializeDpc, 3),
4156 IMPORT_SFUNC(KeInsertQueueDpc, 3),
4157 IMPORT_SFUNC(KeRemoveQueueDpc, 1),
4158 IMPORT_SFUNC(KeSetImportanceDpc, 2),
4159 IMPORT_SFUNC(KeSetTargetProcessorDpc, 2),
4160 IMPORT_SFUNC(KeFlushQueuedDpcs, 0),
4161 IMPORT_SFUNC(KeGetCurrentProcessorNumber, 1),
4162 IMPORT_SFUNC(ObReferenceObjectByHandle, 6),
4163 IMPORT_FFUNC(ObfDereferenceObject, 1),
4164 IMPORT_SFUNC(ZwClose, 1),
4165 IMPORT_SFUNC(PsCreateSystemThread, 7),
4166 IMPORT_SFUNC(PsTerminateSystemThread, 1),
4167 IMPORT_SFUNC(IoWMIRegistrationControl, 2),
4168 IMPORT_SFUNC(WmiQueryTraceInformation, 5),
4169 IMPORT_CFUNC(WmiTraceMessage, 0),
4170 IMPORT_SFUNC(KeQuerySystemTime, 1),
4171 IMPORT_CFUNC(KeTickCount, 0),
4172 IMPORT_SFUNC(KeDelayExecutionThread, 3),
4173 IMPORT_SFUNC(KeQueryInterruptTime, 0),
4174 IMPORT_SFUNC(KeGetCurrentThread, 0),
4175 IMPORT_SFUNC(KeSetPriorityThread, 2),
4178 * This last entry is a catch-all for any function we haven't
4179 * implemented yet. The PE import list patching routine will
4180 * use it for any function that doesn't have an explicit match
4184 { NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL },
4188 { NULL, NULL, NULL }