2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> All rights reserved.
3 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Terrence R. Lambert
4 * cdevsw from kern/kern_conf.c Copyright (c) 1995 Julian R. Elishcer,
6 * Copyright (c) 1982, 1986, 1991, 1993
7 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * $DragonFly: src/sys/kern/kern_device.c,v 1.27 2007/07/23 18:59:50 dillon Exp $
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/sysctl.h>
36 #include <sys/systm.h>
37 #include <sys/module.h>
38 #include <sys/malloc.h>
42 #include <sys/vnode.h>
43 #include <sys/queue.h>
44 #include <sys/device.h>
46 #include <sys/syslink_rpc.h>
48 #include <machine/stdarg.h>
49 #include <sys/thread2.h>
50 #include <vfs/devfs/devfs.h>
53 * system link descriptors identify the command in the
54 * arguments structure.
56 #define DDESCNAME(name) __CONCAT(__CONCAT(dev_,name),_desc)
58 #define DEVOP_DESC_INIT(name) \
59 struct syslink_desc DDESCNAME(name) = { \
60 __offsetof(struct dev_ops, __CONCAT(d_, name)), \
63 DEVOP_DESC_INIT(default);
64 DEVOP_DESC_INIT(open);
65 DEVOP_DESC_INIT(close);
66 DEVOP_DESC_INIT(read);
67 DEVOP_DESC_INIT(write);
68 DEVOP_DESC_INIT(ioctl);
69 DEVOP_DESC_INIT(dump);
70 DEVOP_DESC_INIT(psize);
71 DEVOP_DESC_INIT(poll);
72 DEVOP_DESC_INIT(mmap);
73 DEVOP_DESC_INIT(strategy);
74 DEVOP_DESC_INIT(kqfilter);
75 DEVOP_DESC_INIT(revoke);
76 DEVOP_DESC_INIT(clone);
81 struct dev_ops dead_dev_ops;
83 struct dev_ops default_dev_ops = {
85 .d_default = NULL, /* must be NULL */
93 .d_strategy = nostrategy,
96 .d_kqfilter = nokqfilter,
101 /************************************************************************
102 * GENERAL DEVICE API FUNCTIONS *
103 ************************************************************************/
106 dev_dopen(cdev_t dev, int oflags, int devtype, struct ucred *cred)
108 struct dev_open_args ap;
110 ap.a_head.a_desc = &dev_open_desc;
111 ap.a_head.a_dev = dev;
112 ap.a_oflags = oflags;
113 ap.a_devtype = devtype;
115 return(dev->si_ops->d_open(&ap));
119 dev_dclose(cdev_t dev, int fflag, int devtype)
121 struct dev_close_args ap;
123 ap.a_head.a_desc = &dev_close_desc;
124 ap.a_head.a_dev = dev;
126 ap.a_devtype = devtype;
127 return(dev->si_ops->d_close(&ap));
131 dev_dread(cdev_t dev, struct uio *uio, int ioflag)
133 struct dev_read_args ap;
136 ap.a_head.a_desc = &dev_read_desc;
137 ap.a_head.a_dev = dev;
139 ap.a_ioflag = ioflag;
140 error = dev->si_ops->d_read(&ap);
142 dev->si_lastread = time_second;
147 dev_dwrite(cdev_t dev, struct uio *uio, int ioflag)
149 struct dev_write_args ap;
152 dev->si_lastwrite = time_second;
153 ap.a_head.a_desc = &dev_write_desc;
154 ap.a_head.a_dev = dev;
156 ap.a_ioflag = ioflag;
157 error = dev->si_ops->d_write(&ap);
162 dev_dioctl(cdev_t dev, u_long cmd, caddr_t data, int fflag, struct ucred *cred)
164 struct dev_ioctl_args ap;
166 ap.a_head.a_desc = &dev_ioctl_desc;
167 ap.a_head.a_dev = dev;
172 return(dev->si_ops->d_ioctl(&ap));
176 dev_dpoll(cdev_t dev, int events)
178 struct dev_poll_args ap;
181 ap.a_head.a_desc = &dev_poll_desc;
182 ap.a_head.a_dev = dev;
183 ap.a_events = events;
184 error = dev->si_ops->d_poll(&ap);
187 return (seltrue(dev, events));
191 dev_dmmap(cdev_t dev, vm_offset_t offset, int nprot)
193 struct dev_mmap_args ap;
196 ap.a_head.a_desc = &dev_mmap_desc;
197 ap.a_head.a_dev = dev;
198 ap.a_offset = offset;
200 error = dev->si_ops->d_mmap(&ap);
207 dev_dclone(cdev_t dev)
209 struct dev_clone_args ap;
211 ap.a_head.a_desc = &dev_clone_desc;
212 ap.a_head.a_dev = dev;
213 return (dev->si_ops->d_clone(&ap));
217 dev_drevoke(cdev_t dev)
219 struct dev_revoke_args ap;
221 ap.a_head.a_desc = &dev_revoke_desc;
222 ap.a_head.a_dev = dev;
223 return (dev->si_ops->d_revoke(&ap));
227 * Core device strategy call, used to issue I/O on a device. There are
228 * two versions, a non-chained version and a chained version. The chained
229 * version reuses a BIO set up by vn_strategy(). The only difference is
230 * that, for now, we do not push a new tracking structure when chaining
231 * from vn_strategy. XXX this will ultimately have to change.
234 dev_dstrategy(cdev_t dev, struct bio *bio)
236 struct dev_strategy_args ap;
237 struct bio_track *track;
239 ap.a_head.a_desc = &dev_strategy_desc;
240 ap.a_head.a_dev = dev;
243 KKASSERT(bio->bio_track == NULL);
244 KKASSERT(bio->bio_buf->b_cmd != BUF_CMD_DONE);
245 if (bio->bio_buf->b_cmd == BUF_CMD_READ)
246 track = &dev->si_track_read;
248 track = &dev->si_track_write;
249 bio_track_ref(track);
250 bio->bio_track = track;
251 KKASSERT((bio->bio_flags & BIO_DONE) == 0);
252 (void)dev->si_ops->d_strategy(&ap);
256 dev_dstrategy_chain(cdev_t dev, struct bio *bio)
258 struct dev_strategy_args ap;
260 ap.a_head.a_desc = &dev_strategy_desc;
261 ap.a_head.a_dev = dev;
264 KKASSERT(bio->bio_track != NULL);
265 KKASSERT((bio->bio_flags & BIO_DONE) == 0);
266 (void)dev->si_ops->d_strategy(&ap);
270 * note: the disk layer is expected to set count, blkno, and secsize before
271 * forwarding the message.
274 dev_ddump(cdev_t dev)
276 struct dev_dump_args ap;
278 ap.a_head.a_desc = &dev_dump_desc;
279 ap.a_head.a_dev = dev;
283 return(dev->si_ops->d_dump(&ap));
287 dev_dpsize(cdev_t dev)
289 struct dev_psize_args ap;
292 ap.a_head.a_desc = &dev_psize_desc;
293 ap.a_head.a_dev = dev;
294 error = dev->si_ops->d_psize(&ap);
296 return (ap.a_result);
301 dev_dkqfilter(cdev_t dev, struct knote *kn)
303 struct dev_kqfilter_args ap;
306 ap.a_head.a_desc = &dev_kqfilter_desc;
307 ap.a_head.a_dev = dev;
309 error = dev->si_ops->d_kqfilter(&ap);
315 /************************************************************************
316 * DEVICE HELPER FUNCTIONS *
317 ************************************************************************/
323 dev_drefs(cdev_t dev)
325 return(dev->si_sysref.refcnt);
332 dev_dname(cdev_t dev)
334 return(dev->si_ops->head.name);
341 dev_dflags(cdev_t dev)
343 return(dev->si_ops->head.flags);
352 return(dev->si_ops->head.maj);
356 * Used when forwarding a request through layers. The caller adjusts
357 * ap->a_head.a_dev and then calls this function.
360 dev_doperate(struct dev_generic_args *ap)
362 int (*func)(struct dev_generic_args *);
364 func = *(void **)((char *)ap->a_dev->si_ops + ap->a_desc->sd_offset);
369 * Used by the console intercept code only. Issue an operation through
370 * a foreign ops structure allowing the ops structure associated
371 * with the device to remain intact.
374 dev_doperate_ops(struct dev_ops *ops, struct dev_generic_args *ap)
376 int (*func)(struct dev_generic_args *);
378 func = *(void **)((char *)ops + ap->a_desc->sd_offset);
383 * Convert a template dev_ops into the real thing by filling in
384 * uninitialized fields.
387 compile_dev_ops(struct dev_ops *ops)
391 for (offset = offsetof(struct dev_ops, dev_ops_first_field);
392 offset <= offsetof(struct dev_ops, dev_ops_last_field);
393 offset += sizeof(void *)
395 void **func_p = (void **)((char *)ops + offset);
396 void **def_p = (void **)((char *)&default_dev_ops + offset);
397 if (*func_p == NULL) {
399 *func_p = ops->d_default;
406 /************************************************************************
407 * MAJOR/MINOR SPACE FUNCTION *
408 ************************************************************************/
411 * This makes a dev_ops entry visible to userland (e.g /dev/<blah>).
413 * The kernel can overload a data space by making multiple dev_ops_add()
414 * calls, but only the most recent one in the list matching the mask/match
415 * will be visible to userland.
417 * make_dev() does not automatically call dev_ops_add() (nor do we want it
418 * to, since partition-managed disk devices are overloaded on top of the
421 * Disk devices typically register their major, e.g. 'ad0', and then call
422 * into the disk label management code which overloads its own onto e.g. 'ad0'
423 * to support all the various slice and partition combinations.
425 * The mask/match supplied in this call are a full 32 bits and the same
426 * mask and match must be specified in a later dev_ops_remove() call to
427 * match this add. However, the match value for the minor number should never
428 * have any bits set in the major number's bit range (8-15). The mask value
429 * may be conveniently specified as -1 without creating any major number
435 rb_dev_ops_compare(struct dev_ops_maj *a, struct dev_ops_maj *b)
439 else if (a->maj > b->maj)
444 RB_GENERATE2(dev_ops_rb_tree, dev_ops_maj, rbnode, rb_dev_ops_compare, int, maj);
446 struct dev_ops_rb_tree dev_ops_rbhead = RB_INITIALIZER(dev_ops_rbhead);
449 dev_ops_add(struct dev_ops *ops, u_int mask, u_int match)
453 static int next_maj = 256; /* first dynamic major number */
454 struct dev_ops_maj *rbmaj;
455 struct dev_ops_link *link;
457 compile_dev_ops(ops);
458 if (ops->head.maj < 0) {
459 while (dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead, next_maj) != NULL) {
463 ops->head.maj = next_maj;
465 rbmaj = dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead, ops->head.maj);
467 rbmaj = kmalloc(sizeof(*rbmaj), M_DEVBUF, M_INTWAIT | M_ZERO);
468 rbmaj->maj = ops->head.maj;
469 dev_ops_rb_tree_RB_INSERT(&dev_ops_rbhead, rbmaj);
471 for (link = rbmaj->link; link; link = link->next) {
473 * If we get an exact match we usurp the target, but we only print
474 * a warning message if a different device switch is installed.
476 if (link->mask == mask && link->match == match) {
477 if (link->ops != ops) {
478 kprintf("WARNING: \"%s\" (%p) is usurping \"%s\"'s"
481 link->ops->head.name, link->ops);
488 * XXX add additional warnings for overlaps
492 link = kmalloc(sizeof(struct dev_ops_link), M_DEVBUF, M_INTWAIT|M_ZERO);
496 link->next = rbmaj->link;
503 * Should only be used by udev2dev().
505 * If the minor number is -1, we match the first ops we find for this
506 * major. If the mask is not -1 then multiple minor numbers can match
509 * Note that this function will return NULL if the minor number is not within
510 * the bounds of the installed mask(s).
512 * The specified minor number should NOT include any major bits.
515 dev_ops_get(int x, int y)
517 struct dev_ops_maj *rbmaj;
518 struct dev_ops_link *link;
522 rbmaj = dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead, x);
525 for (link = rbmaj->link; link; link = link->next) {
526 if (y == -1 || (link->mask & y) == link->match)
533 * Remove all matching dev_ops entries from the dev_ops_array[] major
534 * array so no new user opens can be performed, and destroy all devices
535 * installed in the hash table that are associated with this dev_ops. (see
536 * destroy_all_devs()).
538 * The mask and match should match a previous call to dev_ops_add*().
541 dev_ops_remove(struct dev_ops *ops, u_int mask, u_int match)
543 struct dev_ops_maj *rbmaj;
544 struct dev_ops_link *link;
545 struct dev_ops_link **plink;
547 if (ops != &dead_dev_ops)
548 destroy_all_devs(ops, mask, match);
550 rbmaj = dev_ops_rb_tree_RB_LOOKUP(&dev_ops_rbhead, ops->head.maj);
552 kprintf("double-remove of dev_ops %p for %s(%d)\n",
553 ops, ops->head.name, ops->head.maj);
556 for (plink = &rbmaj->link; (link = *plink) != NULL;
557 plink = &link->next) {
558 if (link->mask == mask && link->match == match) {
559 if (link->ops == ops)
561 kprintf("%s: ERROR: cannot remove dev_ops, "
562 "its major number %d was stolen by %s\n",
563 ops->head.name, ops->head.maj,
569 kprintf("%s(%d)[%08x/%08x]: WARNING: ops removed "
571 ops->head.name, ops->head.maj, mask, match);
574 --ops->head.refs; /* XXX ops_release() / record refs */
575 kfree(link, M_DEVBUF);
579 * Scrap the RB tree node for the major number if no ops are
580 * installed any longer.
582 if (rbmaj->link == NULL) {
583 dev_ops_rb_tree_RB_REMOVE(&dev_ops_rbhead, rbmaj);
584 kfree(rbmaj, M_DEVBUF);
589 * The same ops might be used with multiple devices, so don't
590 * complain if the ref count is non-zero.
592 if (ops->head.refs != 0) {
593 kprintf("%s(%d)[%08x/%08x]: Warning: dev_ops_remove() called "
594 "while %d device refs still exist!\n",
595 ops->head.name, ops->head.maj, mask, match,
599 kprintf("%s: ops removed\n", ops->head.name);
605 int dev_ops_remove_all(struct dev_ops *ops)
607 return devfs_destroy_dev_by_ops(ops, -1);
610 int dev_ops_remove_minor(struct dev_ops *ops, int minor)
612 return devfs_destroy_dev_by_ops(ops, minor);
616 * Release a ops entry. When the ref count reaches zero, recurse
620 dev_ops_release(struct dev_ops *ops)
624 if (ops->head.refs == 0) {
630 dev_ops_intercept(cdev_t dev, struct dev_ops *iops)
632 struct dev_ops *oops = dev->si_ops;
634 compile_dev_ops(iops);
635 iops->head.maj = oops->head.maj;
636 iops->head.data = oops->head.data;
637 iops->head.flags = oops->head.flags;
639 dev->si_flags |= SI_INTERCEPTED;
645 dev_ops_restore(cdev_t dev, struct dev_ops *oops)
647 struct dev_ops *iops = dev->si_ops;
650 dev->si_flags &= ~SI_INTERCEPTED;
652 iops->head.data = NULL;
653 iops->head.flags = 0;
656 /************************************************************************
657 * DEFAULT DEV OPS FUNCTIONS *
658 ************************************************************************/
662 * Unsupported devswitch functions (e.g. for writing to read-only device).
663 * XXX may belong elsewhere.
666 norevoke(struct dev_revoke_args *ap)
673 noclone(struct dev_clone_args *ap)
676 return (0); /* allow the clone */
680 noopen(struct dev_open_args *ap)
686 noclose(struct dev_close_args *ap)
692 noread(struct dev_read_args *ap)
698 nowrite(struct dev_write_args *ap)
704 noioctl(struct dev_ioctl_args *ap)
710 nokqfilter(struct dev_kqfilter_args *ap)
716 nommap(struct dev_mmap_args *ap)
722 nopoll(struct dev_poll_args *ap)
729 nostrategy(struct dev_strategy_args *ap)
731 struct bio *bio = ap->a_bio;
733 bio->bio_buf->b_flags |= B_ERROR;
734 bio->bio_buf->b_error = EOPNOTSUPP;
740 nopsize(struct dev_psize_args *ap)
747 nodump(struct dev_dump_args *ap)
753 * XXX this is probably bogus. Any device that uses it isn't checking the
757 nullopen(struct dev_open_args *ap)
763 nullclose(struct dev_close_args *ap)