proc->thread stage 2: MAJOR revamping of system calls, ucred, jail API,
[dragonfly.git] / sys / kern / subr_rman.c
1 /*
2  * Copyright 1998 Massachusetts Institute of Technology
3  *
4  * Permission to use, copy, modify, and distribute this software and
5  * its documentation for any purpose and without fee is hereby
6  * granted, provided that both the above copyright notice and this
7  * permission notice appear in all copies, that both the above
8  * copyright notice and this permission notice appear in all
9  * supporting documentation, and that the name of M.I.T. not be used
10  * in advertising or publicity pertaining to distribution of the
11  * software without specific, written prior permission.  M.I.T. makes
12  * no representations about the suitability of this software for any
13  * purpose.  It is provided "as is" without express or implied
14  * warranty.
15  * 
16  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD: src/sys/kern/subr_rman.c,v 1.10.2.1 2001/06/05 08:06:08 imp Exp $
30  * $DragonFly: src/sys/kern/subr_rman.c,v 1.2 2003/06/17 04:28:41 dillon Exp $
31  */
32
33 /*
34  * The kernel resource manager.  This code is responsible for keeping track
35  * of hardware resources which are apportioned out to various drivers.
36  * It does not actually assign those resources, and it is not expected
37  * that end-device drivers will call into this code directly.  Rather,
38  * the code which implements the buses that those devices are attached to,
39  * and the code which manages CPU resources, will call this code, and the
40  * end-device drivers will make upcalls to that code to actually perform
41  * the allocation.
42  *
43  * There are two sorts of resources managed by this code.  The first is
44  * the more familiar array (RMAN_ARRAY) type; resources in this class
45  * consist of a sequence of individually-allocatable objects which have
46  * been numbered in some well-defined order.  Most of the resources
47  * are of this type, as it is the most familiar.  The second type is
48  * called a gauge (RMAN_GAUGE), and models fungible resources (i.e.,
49  * resources in which each instance is indistinguishable from every
50  * other instance).  The principal anticipated application of gauges
51  * is in the context of power consumption, where a bus may have a specific
52  * power budget which all attached devices share.  RMAN_GAUGE is not
53  * implemented yet.
54  *
55  * For array resources, we make one simplifying assumption: two clients
56  * sharing the same resource must use the same range of indices.  That
57  * is to say, sharing of overlapping-but-not-identical regions is not
58  * permitted.
59  */
60
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/kernel.h>
64 #include <sys/lock.h>
65 #include <sys/malloc.h>
66 #include <sys/bus.h>            /* XXX debugging */
67 #include <machine/bus.h>
68 #include <sys/rman.h>
69
70 static MALLOC_DEFINE(M_RMAN, "rman", "Resource manager");
71
72 struct  rman_head rman_head;
73 #ifndef NULL_SIMPLELOCKS
74 static  struct simplelock rman_lock; /* mutex to protect rman_head */
75 #endif
76 static  int int_rman_activate_resource(struct rman *rm, struct resource *r,
77                                        struct resource **whohas);
78 static  int int_rman_deactivate_resource(struct resource *r);
79 static  int int_rman_release_resource(struct rman *rm, struct resource *r);
80
81 #define CIRCLEQ_TERMCOND(var, head)     (var == (void *)&(head))
82
83 int
84 rman_init(struct rman *rm)
85 {
86         static int once;
87
88         if (once == 0) {
89                 once = 1;
90                 TAILQ_INIT(&rman_head);
91                 simple_lock_init(&rman_lock);
92         }
93
94         if (rm->rm_type == RMAN_UNINIT)
95                 panic("rman_init");
96         if (rm->rm_type == RMAN_GAUGE)
97                 panic("implement RMAN_GAUGE");
98
99         CIRCLEQ_INIT(&rm->rm_list);
100         rm->rm_slock = malloc(sizeof *rm->rm_slock, M_RMAN, M_NOWAIT);
101         if (rm->rm_slock == 0)
102                 return ENOMEM;
103         simple_lock_init(rm->rm_slock);
104
105         simple_lock(&rman_lock);
106         TAILQ_INSERT_TAIL(&rman_head, rm, rm_link);
107         simple_unlock(&rman_lock);
108         return 0;
109 }
110
111 /*
112  * NB: this interface is not robust against programming errors which
113  * add multiple copies of the same region.
114  */
115 int
116 rman_manage_region(struct rman *rm, u_long start, u_long end)
117 {
118         struct resource *r, *s;
119
120         r = malloc(sizeof *r, M_RMAN, M_NOWAIT);
121         if (r == 0)
122                 return ENOMEM;
123         bzero(r, sizeof *r);
124         r->r_sharehead = 0;
125         r->r_start = start;
126         r->r_end = end;
127         r->r_flags = 0;
128         r->r_dev = 0;
129         r->r_rm = rm;
130
131         simple_lock(rm->rm_slock);
132         for (s = CIRCLEQ_FIRST(&rm->rm_list);   
133              !CIRCLEQ_TERMCOND(s, rm->rm_list) && s->r_end < r->r_start;
134              s = CIRCLEQ_NEXT(s, r_link))
135                 ;
136
137         if (CIRCLEQ_TERMCOND(s, rm->rm_list)) {
138                 CIRCLEQ_INSERT_TAIL(&rm->rm_list, r, r_link);
139         } else {
140                 CIRCLEQ_INSERT_BEFORE(&rm->rm_list, s, r, r_link);
141         }
142
143         simple_unlock(rm->rm_slock);
144         return 0;
145 }
146
147 int
148 rman_fini(struct rman *rm)
149 {
150         struct resource *r;
151
152         simple_lock(rm->rm_slock);
153         CIRCLEQ_FOREACH(r, &rm->rm_list, r_link) {
154                 if (r->r_flags & RF_ALLOCATED) {
155                         simple_unlock(rm->rm_slock);
156                         return EBUSY;
157                 }
158         }
159
160         /*
161          * There really should only be one of these if we are in this
162          * state and the code is working properly, but it can't hurt.
163          */
164         while (!CIRCLEQ_EMPTY(&rm->rm_list)) {
165                 r = CIRCLEQ_FIRST(&rm->rm_list);
166                 CIRCLEQ_REMOVE(&rm->rm_list, r, r_link);
167                 free(r, M_RMAN);
168         }
169         simple_unlock(rm->rm_slock);
170         simple_lock(&rman_lock);
171         TAILQ_REMOVE(&rman_head, rm, rm_link);
172         simple_unlock(&rman_lock);
173         free(rm->rm_slock, M_RMAN);
174
175         return 0;
176 }
177
178 struct resource *
179 rman_reserve_resource(struct rman *rm, u_long start, u_long end, u_long count,
180                       u_int flags, struct device *dev)
181 {
182         u_int   want_activate;
183         struct  resource *r, *s, *rv;
184         u_long  rstart, rend;
185
186         rv = 0;
187
188 #ifdef RMAN_DEBUG
189         printf("rman_reserve_resource: <%s> request: [%#lx, %#lx], length "
190                "%#lx, flags %u, device %s%d\n", rm->rm_descr, start, end,
191                count, flags, device_get_name(dev), device_get_unit(dev));
192 #endif /* RMAN_DEBUG */
193         want_activate = (flags & RF_ACTIVE);
194         flags &= ~RF_ACTIVE;
195
196         simple_lock(rm->rm_slock);
197
198         for (r = CIRCLEQ_FIRST(&rm->rm_list); 
199              !CIRCLEQ_TERMCOND(r, rm->rm_list) && r->r_end < start;
200              r = CIRCLEQ_NEXT(r, r_link))
201                 ;
202
203         if (CIRCLEQ_TERMCOND(r, rm->rm_list)) {
204 #ifdef RMAN_DEBUG
205                 printf("could not find a region\n");
206 #endif RMAN_DEBUG
207                 goto out;
208         }
209
210         /*
211          * First try to find an acceptable totally-unshared region.
212          */
213         for (s = r; !CIRCLEQ_TERMCOND(s, rm->rm_list);
214              s = CIRCLEQ_NEXT(s, r_link)) {
215 #ifdef RMAN_DEBUG
216                 printf("considering [%#lx, %#lx]\n", s->r_start, s->r_end);
217 #endif /* RMAN_DEBUG */
218                 if (s->r_start > end) {
219 #ifdef RMAN_DEBUG
220                         printf("s->r_start (%#lx) > end (%#lx)\n", s->r_start, end);
221 #endif /* RMAN_DEBUG */
222                         break;
223                 }
224                 if (s->r_flags & RF_ALLOCATED) {
225 #ifdef RMAN_DEBUG
226                         printf("region is allocated\n");
227 #endif /* RMAN_DEBUG */
228                         continue;
229                 }
230                 rstart = max(s->r_start, start);
231                 rstart = (rstart + ((1ul << RF_ALIGNMENT(flags))) - 1) &
232                     ~((1ul << RF_ALIGNMENT(flags)) - 1);
233                 rend = min(s->r_end, max(start + count, end));
234 #ifdef RMAN_DEBUG
235                 printf("truncated region: [%#lx, %#lx]; size %#lx (requested %#lx)\n",
236                        rstart, rend, (rend - rstart + 1), count);
237 #endif /* RMAN_DEBUG */
238
239                 if ((rend - rstart + 1) >= count) {
240 #ifdef RMAN_DEBUG
241                         printf("candidate region: [%#lx, %#lx], size %#lx\n",
242                                rend, rstart, (rend - rstart + 1));
243 #endif /* RMAN_DEBUG */
244                         if ((s->r_end - s->r_start + 1) == count) {
245 #ifdef RMAN_DEBUG
246                                 printf("candidate region is entire chunk\n");
247 #endif /* RMAN_DEBUG */
248                                 rv = s;
249                                 rv->r_flags |= RF_ALLOCATED | flags;
250                                 rv->r_dev = dev;
251                                 goto out;
252                         }
253
254                         /*
255                          * If s->r_start < rstart and
256                          *    s->r_end > rstart + count - 1, then
257                          * we need to split the region into three pieces
258                          * (the middle one will get returned to the user).
259                          * Otherwise, we are allocating at either the
260                          * beginning or the end of s, so we only need to
261                          * split it in two.  The first case requires
262                          * two new allocations; the second requires but one.
263                          */
264                         rv = malloc(sizeof *rv, M_RMAN, M_NOWAIT);
265                         if (rv == 0)
266                                 goto out;
267                         bzero(rv, sizeof *rv);
268                         rv->r_start = rstart;
269                         rv->r_end = rstart + count - 1;
270                         rv->r_flags = flags | RF_ALLOCATED;
271                         rv->r_dev = dev;
272                         rv->r_sharehead = 0;
273                         rv->r_rm = rm;
274                         
275                         if (s->r_start < rv->r_start && s->r_end > rv->r_end) {
276 #ifdef RMAN_DEBUG
277                                 printf("splitting region in three parts: "
278                                        "[%#lx, %#lx]; [%#lx, %#lx]; [%#lx, %#lx]\n",
279                                        s->r_start, rv->r_start - 1,
280                                        rv->r_start, rv->r_end,
281                                        rv->r_end + 1, s->r_end);
282 #endif /* RMAN_DEBUG */
283                                 /*
284                                  * We are allocating in the middle.
285                                  */
286                                 r = malloc(sizeof *r, M_RMAN, M_NOWAIT);
287                                 if (r == 0) {
288                                         free(rv, M_RMAN);
289                                         rv = 0;
290                                         goto out;
291                                 }
292                                 bzero(r, sizeof *r);
293                                 r->r_start = rv->r_end + 1;
294                                 r->r_end = s->r_end;
295                                 r->r_flags = s->r_flags;
296                                 r->r_dev = 0;
297                                 r->r_sharehead = 0;
298                                 r->r_rm = rm;
299                                 s->r_end = rv->r_start - 1;
300                                 CIRCLEQ_INSERT_AFTER(&rm->rm_list, s, rv,
301                                                      r_link);
302                                 CIRCLEQ_INSERT_AFTER(&rm->rm_list, rv, r,
303                                                      r_link);
304                         } else if (s->r_start == rv->r_start) {
305 #ifdef RMAN_DEBUG
306                                 printf("allocating from the beginning\n");
307 #endif /* RMAN_DEBUG */
308                                 /*
309                                  * We are allocating at the beginning.
310                                  */
311                                 s->r_start = rv->r_end + 1;
312                                 CIRCLEQ_INSERT_BEFORE(&rm->rm_list, s, rv,
313                                                       r_link);
314                         } else {
315 #ifdef RMAN_DEBUG
316                                 printf("allocating at the end\n");
317 #endif /* RMAN_DEBUG */
318                                 /*
319                                  * We are allocating at the end.
320                                  */
321                                 s->r_end = rv->r_start - 1;
322                                 CIRCLEQ_INSERT_AFTER(&rm->rm_list, s, rv,
323                                                      r_link);
324                         }
325                         goto out;
326                 }
327         }
328
329         /*
330          * Now find an acceptable shared region, if the client's requirements
331          * allow sharing.  By our implementation restriction, a candidate
332          * region must match exactly by both size and sharing type in order
333          * to be considered compatible with the client's request.  (The
334          * former restriction could probably be lifted without too much
335          * additional work, but this does not seem warranted.)
336          */
337 #ifdef RMAN_DEBUG
338         printf("no unshared regions found\n");
339 #endif /* RMAN_DEBUG */
340         if ((flags & (RF_SHAREABLE | RF_TIMESHARE)) == 0)
341                 goto out;
342
343         for (s = r; !CIRCLEQ_TERMCOND(s, rm->rm_list);
344              s = CIRCLEQ_NEXT(s, r_link)) {
345                 if (s->r_start > end)
346                         break;
347                 if ((s->r_flags & flags) != flags)
348                         continue;
349                 rstart = max(s->r_start, start);
350                 rend = min(s->r_end, max(start + count, end));
351                 if (s->r_start >= start && s->r_end <= end
352                     && (s->r_end - s->r_start + 1) == count) {
353                         rv = malloc(sizeof *rv, M_RMAN, M_NOWAIT);
354                         if (rv == 0)
355                                 goto out;
356                         bzero(rv, sizeof *rv);
357                         rv->r_start = s->r_start;
358                         rv->r_end = s->r_end;
359                         rv->r_flags = s->r_flags & 
360                                 (RF_ALLOCATED | RF_SHAREABLE | RF_TIMESHARE);
361                         rv->r_dev = dev;
362                         rv->r_rm = rm;
363                         if (s->r_sharehead == 0) {
364                                 s->r_sharehead = malloc(sizeof *s->r_sharehead,
365                                                         M_RMAN, M_NOWAIT);
366                                 if (s->r_sharehead == 0) {
367                                         free(rv, M_RMAN);
368                                         rv = 0;
369                                         goto out;
370                                 }
371                                 bzero(s->r_sharehead, sizeof *s->r_sharehead);
372                                 LIST_INIT(s->r_sharehead);
373                                 LIST_INSERT_HEAD(s->r_sharehead, s, 
374                                                  r_sharelink);
375                                 s->r_flags |= RF_FIRSTSHARE;
376                         }
377                         rv->r_sharehead = s->r_sharehead;
378                         LIST_INSERT_HEAD(s->r_sharehead, rv, r_sharelink);
379                         goto out;
380                 }
381         }
382
383         /*
384          * We couldn't find anything.
385          */
386 out:
387         /*
388          * If the user specified RF_ACTIVE in the initial flags,
389          * which is reflected in `want_activate', we attempt to atomically
390          * activate the resource.  If this fails, we release the resource
391          * and indicate overall failure.  (This behavior probably doesn't
392          * make sense for RF_TIMESHARE-type resources.)
393          */
394         if (rv && want_activate) {
395                 struct resource *whohas;
396                 if (int_rman_activate_resource(rm, rv, &whohas)) {
397                         int_rman_release_resource(rm, rv);
398                         rv = 0;
399                 }
400         }
401                         
402         simple_unlock(rm->rm_slock);
403         return (rv);
404 }
405
406 static int
407 int_rman_activate_resource(struct rman *rm, struct resource *r,
408                            struct resource **whohas)
409 {
410         struct resource *s;
411         int ok;
412
413         /*
414          * If we are not timesharing, then there is nothing much to do.
415          * If we already have the resource, then there is nothing at all to do.
416          * If we are not on a sharing list with anybody else, then there is
417          * little to do.
418          */
419         if ((r->r_flags & RF_TIMESHARE) == 0
420             || (r->r_flags & RF_ACTIVE) != 0
421             || r->r_sharehead == 0) {
422                 r->r_flags |= RF_ACTIVE;
423                 return 0;
424         }
425
426         ok = 1;
427         for (s = LIST_FIRST(r->r_sharehead); s && ok;
428              s = LIST_NEXT(s, r_sharelink)) {
429                 if ((s->r_flags & RF_ACTIVE) != 0) {
430                         ok = 0;
431                         *whohas = s;
432                 }
433         }
434         if (ok) {
435                 r->r_flags |= RF_ACTIVE;
436                 return 0;
437         }
438         return EBUSY;
439 }
440
441 int
442 rman_activate_resource(struct resource *r)
443 {
444         int rv;
445         struct resource *whohas;
446         struct rman *rm;
447
448         rm = r->r_rm;
449         simple_lock(rm->rm_slock);
450         rv = int_rman_activate_resource(rm, r, &whohas);
451         simple_unlock(rm->rm_slock);
452         return rv;
453 }
454
455 int
456 rman_await_resource(struct resource *r, int pri, int timo)
457 {
458         int     rv, s;
459         struct  resource *whohas;
460         struct  rman *rm;
461
462         rm = r->r_rm;
463         for (;;) {
464                 simple_lock(rm->rm_slock);
465                 rv = int_rman_activate_resource(rm, r, &whohas);
466                 if (rv != EBUSY)
467                         return (rv);    /* returns with simplelock */
468
469                 if (r->r_sharehead == 0)
470                         panic("rman_await_resource");
471                 /*
472                  * splhigh hopefully will prevent a race between
473                  * simple_unlock and tsleep where a process
474                  * could conceivably get in and release the resource
475                  * before we have a chance to sleep on it.
476                  */
477                 s = splhigh();
478                 whohas->r_flags |= RF_WANTED;
479                 simple_unlock(rm->rm_slock);
480                 rv = tsleep(r->r_sharehead, pri, "rmwait", timo);
481                 if (rv) {
482                         splx(s);
483                         return rv;
484                 }
485                 simple_lock(rm->rm_slock);
486                 splx(s);
487         }
488 }
489
490 static int
491 int_rman_deactivate_resource(struct resource *r)
492 {
493         struct  rman *rm;
494
495         rm = r->r_rm;
496         r->r_flags &= ~RF_ACTIVE;
497         if (r->r_flags & RF_WANTED) {
498                 r->r_flags &= ~RF_WANTED;
499                 wakeup(r->r_sharehead);
500         }
501         return 0;
502 }
503
504 int
505 rman_deactivate_resource(struct resource *r)
506 {
507         struct  rman *rm;
508
509         rm = r->r_rm;
510         simple_lock(rm->rm_slock);
511         int_rman_deactivate_resource(r);
512         simple_unlock(rm->rm_slock);
513         return 0;
514 }
515
516 static int
517 int_rman_release_resource(struct rman *rm, struct resource *r)
518 {
519         struct  resource *s, *t;
520
521         if (r->r_flags & RF_ACTIVE)
522                 int_rman_deactivate_resource(r);
523
524         /*
525          * Check for a sharing list first.  If there is one, then we don't
526          * have to think as hard.
527          */
528         if (r->r_sharehead) {
529                 /*
530                  * If a sharing list exists, then we know there are at
531                  * least two sharers.
532                  *
533                  * If we are in the main circleq, appoint someone else.
534                  */
535                 LIST_REMOVE(r, r_sharelink);
536                 s = LIST_FIRST(r->r_sharehead);
537                 if (r->r_flags & RF_FIRSTSHARE) {
538                         s->r_flags |= RF_FIRSTSHARE;
539                         CIRCLEQ_INSERT_BEFORE(&rm->rm_list, r, s, r_link);
540                         CIRCLEQ_REMOVE(&rm->rm_list, r, r_link);
541                 }
542
543                 /*
544                  * Make sure that the sharing list goes away completely
545                  * if the resource is no longer being shared at all.
546                  */
547                 if (LIST_NEXT(s, r_sharelink) == 0) {
548                         free(s->r_sharehead, M_RMAN);
549                         s->r_sharehead = 0;
550                         s->r_flags &= ~RF_FIRSTSHARE;
551                 }
552                 goto out;
553         }
554
555         /*
556          * Look at the adjacent resources in the list and see if our
557          * segment can be merged with any of them.
558          */
559         s = CIRCLEQ_PREV(r, r_link);
560         t = CIRCLEQ_NEXT(r, r_link);
561
562         if (s != (void *)&rm->rm_list && (s->r_flags & RF_ALLOCATED) == 0
563             && t != (void *)&rm->rm_list && (t->r_flags & RF_ALLOCATED) == 0) {
564                 /*
565                  * Merge all three segments.
566                  */
567                 s->r_end = t->r_end;
568                 CIRCLEQ_REMOVE(&rm->rm_list, r, r_link);
569                 CIRCLEQ_REMOVE(&rm->rm_list, t, r_link);
570                 free(t, M_RMAN);
571         } else if (s != (void *)&rm->rm_list
572                    && (s->r_flags & RF_ALLOCATED) == 0) {
573                 /*
574                  * Merge previous segment with ours.
575                  */
576                 s->r_end = r->r_end;
577                 CIRCLEQ_REMOVE(&rm->rm_list, r, r_link);
578         } else if (t != (void *)&rm->rm_list
579                    && (t->r_flags & RF_ALLOCATED) == 0) {
580                 /*
581                  * Merge next segment with ours.
582                  */
583                 t->r_start = r->r_start;
584                 CIRCLEQ_REMOVE(&rm->rm_list, r, r_link);
585         } else {
586                 /*
587                  * At this point, we know there is nothing we
588                  * can potentially merge with, because on each
589                  * side, there is either nothing there or what is
590                  * there is still allocated.  In that case, we don't
591                  * want to remove r from the list; we simply want to
592                  * change it to an unallocated region and return
593                  * without freeing anything.
594                  */
595                 r->r_flags &= ~RF_ALLOCATED;
596                 return 0;
597         }
598
599 out:
600         free(r, M_RMAN);
601         return 0;
602 }
603
604 int
605 rman_release_resource(struct resource *r)
606 {
607         int     rv;
608         struct  rman *rm = r->r_rm;
609
610         simple_lock(rm->rm_slock);
611         rv = int_rman_release_resource(rm, r);
612         simple_unlock(rm->rm_slock);
613         return (rv);
614 }
615
616 uint32_t
617 rman_make_alignment_flags(uint32_t size)
618 {
619         int     i;
620
621         /*
622          * Find the hightest bit set, and add one if more than one bit
623          * set.  We're effectively computing the ceil(log2(size)) here.
624          */
625         for (i = 32; i > 0; i--)
626                 if ((1 << i) & size)
627                         break;
628         if (~(1 << i) & size)
629                 i++;
630
631         return(RF_ALIGNMENT_LOG2(i));
632 }