vm_object locking fixes
[dragonfly.git] / sys / vm / vm_object.c
1 /*
2  * (MPSAFE)
3  *
4  * Copyright (c) 1991, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * The Mach Operating System project at Carnegie-Mellon University.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *      This product includes software developed by the University of
21  *      California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *      from: @(#)vm_object.c   8.5 (Berkeley) 3/22/94
39  *
40  *
41  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42  * All rights reserved.
43  *
44  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
45  *
46  * Permission to use, copy, modify and distribute this software and
47  * its documentation is hereby granted, provided that both the copyright
48  * notice and this permission notice appear in all copies of the
49  * software, derivative works or modified versions, and any portions
50  * thereof, and that both notices appear in supporting documentation.
51  *
52  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
53  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
54  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
55  *
56  * Carnegie Mellon requests users of this software to return to
57  *
58  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
59  *  School of Computer Science
60  *  Carnegie Mellon University
61  *  Pittsburgh PA 15213-3890
62  *
63  * any improvements or extensions that they make and grant Carnegie the
64  * rights to redistribute these changes.
65  *
66  * $FreeBSD: src/sys/vm/vm_object.c,v 1.171.2.8 2003/05/26 19:17:56 alc Exp $
67  */
68
69 /*
70  *      Virtual memory object module.
71  */
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/proc.h>           /* for curproc, pageproc */
76 #include <sys/thread.h>
77 #include <sys/vnode.h>
78 #include <sys/vmmeter.h>
79 #include <sys/mman.h>
80 #include <sys/mount.h>
81 #include <sys/kernel.h>
82 #include <sys/sysctl.h>
83 #include <sys/refcount.h>
84
85 #include <vm/vm.h>
86 #include <vm/vm_param.h>
87 #include <vm/pmap.h>
88 #include <vm/vm_map.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_page.h>
91 #include <vm/vm_pageout.h>
92 #include <vm/vm_pager.h>
93 #include <vm/swap_pager.h>
94 #include <vm/vm_kern.h>
95 #include <vm/vm_extern.h>
96 #include <vm/vm_zone.h>
97
98 #define EASY_SCAN_FACTOR        8
99
100 static void     vm_object_qcollapse(vm_object_t object);
101 static int      vm_object_page_collect_flush(vm_object_t object, vm_page_t p,
102                                              int pagerflags);
103 static void     vm_object_lock_init(vm_object_t);
104 static void     vm_object_hold_wait(vm_object_t);
105
106
107 /*
108  *      Virtual memory objects maintain the actual data
109  *      associated with allocated virtual memory.  A given
110  *      page of memory exists within exactly one object.
111  *
112  *      An object is only deallocated when all "references"
113  *      are given up.  Only one "reference" to a given
114  *      region of an object should be writeable.
115  *
116  *      Associated with each object is a list of all resident
117  *      memory pages belonging to that object; this list is
118  *      maintained by the "vm_page" module, and locked by the object's
119  *      lock.
120  *
121  *      Each object also records a "pager" routine which is
122  *      used to retrieve (and store) pages to the proper backing
123  *      storage.  In addition, objects may be backed by other
124  *      objects from which they were virtual-copied.
125  *
126  *      The only items within the object structure which are
127  *      modified after time of creation are:
128  *              reference count         locked by object's lock
129  *              pager routine           locked by object's lock
130  *
131  */
132
133 struct object_q vm_object_list;         /* locked by vmobj_token */
134 struct vm_object kernel_object;
135
136 static long vm_object_count;            /* locked by vmobj_token */
137 extern int vm_pageout_page_count;
138
139 static long object_collapses;
140 static long object_bypasses;
141 static int next_index;
142 static vm_zone_t obj_zone;
143 static struct vm_zone obj_zone_store;
144 #define VM_OBJECTS_INIT 256
145 static struct vm_object vm_objects_init[VM_OBJECTS_INIT];
146
147 /*
148  * Misc low level routines
149  */
150 static void
151 vm_object_lock_init(vm_object_t obj)
152 {
153 #if defined(DEBUG_LOCKS)
154         int i;
155
156         obj->debug_hold_bitmap = 0;
157         obj->debug_hold_ovfl = 0;
158         for (i = 0; i < VMOBJ_DEBUG_ARRAY_SIZE; i++) {
159                 obj->debug_hold_thrs[i] = NULL;
160                 obj->debug_hold_file[i] = NULL;
161                 obj->debug_hold_line[i] = 0;
162         }
163 #endif
164 }
165
166 void
167 vm_object_lock_swap(void)
168 {
169         lwkt_token_swap();
170 }
171
172 void
173 vm_object_lock(vm_object_t obj)
174 {
175         lwkt_getpooltoken(obj);
176 }
177
178 void
179 vm_object_unlock(vm_object_t obj)
180 {
181         lwkt_relpooltoken(obj);
182 }
183
184 static __inline void
185 vm_object_assert_held(vm_object_t obj)
186 {
187         ASSERT_LWKT_TOKEN_HELD(lwkt_token_pool_lookup(obj));
188 }
189
190 void
191 #ifndef DEBUG_LOCKS
192 vm_object_hold(vm_object_t obj)
193 #else
194 debugvm_object_hold(vm_object_t obj, char *file, int line)
195 #endif
196 {
197         if (obj == NULL)
198                 return;
199
200         /*
201          * Object must be held (object allocation is stable due to callers
202          * context, typically already holding the token on a parent object)
203          * prior to potentially blocking on the lock, otherwise the object
204          * can get ripped away from us.
205          */
206         refcount_acquire(&obj->hold_count);
207         vm_object_lock(obj);
208
209 #if defined(DEBUG_LOCKS)
210         int i;
211
212         i = ffs(~obj->debug_hold_bitmap) - 1;
213         if (i == -1) {
214                 kprintf("vm_object hold count > VMOBJ_DEBUG_ARRAY_SIZE");
215                 obj->debug_hold_ovfl = 1;
216         }
217
218         obj->debug_hold_bitmap |= (1 << i);
219         obj->debug_hold_thrs[i] = curthread;
220         obj->debug_hold_file[i] = file;
221         obj->debug_hold_line[i] = line;
222 #endif
223 }
224
225 void
226 vm_object_drop(vm_object_t obj)
227 {
228         if (obj == NULL)
229                 return;
230
231 #if defined(DEBUG_LOCKS)
232         int found = 0;
233         int i;
234
235         for (i = 0; i < VMOBJ_DEBUG_ARRAY_SIZE; i++) {
236                 if ((obj->debug_hold_bitmap & (1 << i)) &&
237                     (obj->debug_hold_thrs[i] == curthread)) {
238                         obj->debug_hold_bitmap &= ~(1 << i);
239                         obj->debug_hold_thrs[i] = NULL;
240                         obj->debug_hold_file[i] = NULL;
241                         obj->debug_hold_line[i] = 0;
242                         found = 1;
243                         break;
244                 }
245         }
246
247         if (found == 0 && obj->debug_hold_ovfl == 0)
248                 panic("vm_object: attempt to drop hold on non-self-held obj");
249 #endif
250
251         /*
252          * The lock is a pool token, keep holding it across potential
253          * wakeups to interlock the tsleep/wakeup.
254          */
255         if (refcount_release(&obj->hold_count))
256                 wakeup(obj);
257         vm_object_unlock(obj);
258 }
259
260 /*
261  * This can only be called while the caller holds the object
262  * with the OBJ_DEAD interlock.  Since there are no refs this
263  * is the only thing preventing an object destruction race.
264  */
265 static void
266 vm_object_hold_wait(vm_object_t obj)
267 {
268         vm_object_lock(obj);
269
270 #if defined(DEBUG_LOCKS)
271         int i;
272
273         for (i = 0; i < VMOBJ_DEBUG_ARRAY_SIZE; i++) {
274                 if ((obj->debug_hold_bitmap & (1 << i)) &&
275                     (obj->debug_hold_thrs[i] == curthread))  {
276                         kprintf("vm_object %p: self-hold in at %s:%d\n", obj,
277                                 obj->debug_hold_file[i], obj->debug_hold_line[i]);
278                         panic("vm_object: self-hold in terminate or collapse");
279                 }
280         }
281 #endif
282
283         while (obj->hold_count)
284                 tsleep(obj, 0, "vmobjhld", 0);
285
286         vm_object_unlock(obj);
287 }
288
289
290 /*
291  * Initialize a freshly allocated object
292  *
293  * Used only by vm_object_allocate() and zinitna().
294  *
295  * No requirements.
296  */
297 void
298 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
299 {
300         int incr;
301
302         RB_INIT(&object->rb_memq);
303         LIST_INIT(&object->shadow_head);
304
305         object->type = type;
306         object->size = size;
307         object->ref_count = 1;
308         object->hold_count = 0;
309         object->flags = 0;
310         if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
311                 vm_object_set_flag(object, OBJ_ONEMAPPING);
312         object->paging_in_progress = 0;
313         object->resident_page_count = 0;
314         object->agg_pv_list_count = 0;
315         object->shadow_count = 0;
316         object->pg_color = next_index;
317         if ( size > (PQ_L2_SIZE / 3 + PQ_PRIME1))
318                 incr = PQ_L2_SIZE / 3 + PQ_PRIME1;
319         else
320                 incr = size;
321         next_index = (next_index + incr) & PQ_L2_MASK;
322         object->handle = NULL;
323         object->backing_object = NULL;
324         object->backing_object_offset = (vm_ooffset_t) 0;
325
326         object->generation++;
327         object->swblock_count = 0;
328         RB_INIT(&object->swblock_root);
329         vm_object_lock_init(object);
330
331         lwkt_gettoken(&vmobj_token);
332         TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
333         vm_object_count++;
334         lwkt_reltoken(&vmobj_token);
335 }
336
337 /*
338  * Initialize the VM objects module.
339  *
340  * Called from the low level boot code only.
341  */
342 void
343 vm_object_init(void)
344 {
345         TAILQ_INIT(&vm_object_list);
346         
347         _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(KvaEnd),
348                             &kernel_object);
349
350         obj_zone = &obj_zone_store;
351         zbootinit(obj_zone, "VM OBJECT", sizeof (struct vm_object),
352                 vm_objects_init, VM_OBJECTS_INIT);
353 }
354
355 void
356 vm_object_init2(void)
357 {
358         zinitna(obj_zone, NULL, NULL, 0, 0, ZONE_PANICFAIL, 1);
359 }
360
361 /*
362  * Allocate and return a new object of the specified type and size.
363  *
364  * No requirements.
365  */
366 vm_object_t
367 vm_object_allocate(objtype_t type, vm_pindex_t size)
368 {
369         vm_object_t result;
370
371         result = (vm_object_t) zalloc(obj_zone);
372
373         _vm_object_allocate(type, size, result);
374
375         return (result);
376 }
377
378 /*
379  * Add an additional reference to a vm_object.
380  *
381  * Object passed by caller must be stable or caller must already
382  * hold vmobj_token to avoid races.
383  */
384 void
385 vm_object_reference(vm_object_t object)
386 {
387         lwkt_gettoken(&vmobj_token);
388         vm_object_hold(object);
389         vm_object_reference_locked(object);
390         vm_object_drop(object);
391         lwkt_reltoken(&vmobj_token);
392 }
393
394 void
395 vm_object_reference_locked(vm_object_t object)
396 {
397         if (object) {
398                 ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
399                 /*NOTYET*/
400                 /*ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));*/
401                 object->ref_count++;
402                 if (object->type == OBJT_VNODE) {
403                         vref(object->handle);
404                         /* XXX what if the vnode is being destroyed? */
405                 }
406         }
407 }
408
409 /*
410  * Dereference an object and its underlying vnode.
411  *
412  * The caller must hold vmobj_token.
413  * The object must be locked but not held.  This function will eat the lock.
414  */
415 static void
416 vm_object_vndeallocate(vm_object_t object)
417 {
418         struct vnode *vp = (struct vnode *) object->handle;
419
420         KASSERT(object->type == OBJT_VNODE,
421             ("vm_object_vndeallocate: not a vnode object"));
422         KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
423         ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
424 #ifdef INVARIANTS
425         if (object->ref_count == 0) {
426                 vprint("vm_object_vndeallocate", vp);
427                 panic("vm_object_vndeallocate: bad object reference count");
428         }
429 #endif
430
431         object->ref_count--;
432         if (object->ref_count == 0)
433                 vclrflags(vp, VTEXT);
434         vm_object_unlock(object);
435         vrele(vp);
436 }
437
438 /*
439  * Release a reference to the specified object, gained either through a
440  * vm_object_allocate or a vm_object_reference call.  When all references
441  * are gone, storage associated with this object may be relinquished.
442  *
443  * The caller does not have to hold the object locked but must have control
444  * over the reference in question in order to guarantee that the object
445  * does not get ripped out from under us.
446  */
447 void
448 vm_object_deallocate(vm_object_t object)
449 {
450         lwkt_gettoken(&vmobj_token);
451         vm_object_deallocate_locked(object);
452         lwkt_reltoken(&vmobj_token);
453 }
454
455 void
456 vm_object_deallocate_locked(vm_object_t object)
457 {
458         vm_object_t temp;
459
460         ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
461
462         if (object)
463                 vm_object_lock(object);
464
465         while (object != NULL) {
466                 if (object->type == OBJT_VNODE) {
467                         vm_object_vndeallocate(object);
468                         /* vndeallocate ate the lock */
469                         break;
470                 }
471
472                 if (object->ref_count == 0) {
473                         panic("vm_object_deallocate: object deallocated "
474                               "too many times: %d", object->type);
475                 }
476                 if (object->ref_count > 2) {
477                         object->ref_count--;
478                         vm_object_unlock(object);
479                         break;
480                 }
481
482                 /*
483                  * We currently need the vm_token from this point on, and
484                  * we must recheck ref_count after acquiring it.
485                  */
486                 lwkt_gettoken(&vm_token);
487
488                 if (object->ref_count > 2) {
489                         object->ref_count--;
490                         lwkt_reltoken(&vm_token);
491                         vm_object_unlock(object);
492                         break;
493                 }
494
495                 /*
496                  * Here on ref_count of one or two, which are special cases for
497                  * objects.
498                  *
499                  * Nominal ref_count > 1 case if the second ref is not from
500                  * a shadow.
501                  */
502                 if (object->ref_count == 2 && object->shadow_count == 0) {
503                         vm_object_set_flag(object, OBJ_ONEMAPPING);
504                         object->ref_count--;
505                         lwkt_reltoken(&vm_token);
506                         vm_object_unlock(object);
507                         break;
508                 }
509
510                 /*
511                  * If the second ref is from a shadow we chain along it
512                  * if object's handle is exhausted.
513                  *
514                  * We have to decrement object->ref_count before potentially
515                  * collapsing the first shadow object or the collapse code
516                  * will not be able to handle the degenerate case.
517                  */
518                 if (object->ref_count == 2 && object->shadow_count == 1) {
519                         object->ref_count--;
520                         if (object->handle == NULL &&
521                             (object->type == OBJT_DEFAULT ||
522                              object->type == OBJT_SWAP)) {
523                                 temp = LIST_FIRST(&object->shadow_head);
524                                 KASSERT(temp != NULL,
525                                         ("vm_object_deallocate: ref_count: "
526                                         "%d, shadow_count: %d",
527                                         object->ref_count,
528                                         object->shadow_count));
529                                 lwkt_reltoken(&vm_token);
530                                 vm_object_lock(temp);
531
532                                 if ((temp->handle == NULL) &&
533                                     (temp->type == OBJT_DEFAULT ||
534                                      temp->type == OBJT_SWAP)) {
535                                         /*
536                                          * Special case, must handle ref_count
537                                          * manually to avoid recursion.
538                                          */
539                                         temp->ref_count++;
540                                         vm_object_lock_swap();
541
542                                         while (
543                                                 temp->paging_in_progress ||
544                                                 object->paging_in_progress
545                                         ) {
546                                                 vm_object_pip_wait(temp,
547                                                                    "objde1");
548                                                 vm_object_pip_wait(object,
549                                                                    "objde2");
550                                         }
551
552                                         if (temp->ref_count == 1) {
553                                                 vm_object_unlock(object);
554                                                 object = temp;
555                                                 goto doterm;
556                                         }
557
558                                         lwkt_gettoken(&vm_token);
559                                         vm_object_collapse(temp);
560                                         lwkt_reltoken(&vm_token);
561                                         vm_object_unlock(object);
562                                         object = temp;
563                                         continue;
564                                 }
565                                 vm_object_unlock(temp);
566                         } else {
567                                 lwkt_reltoken(&vm_token);
568                         }
569                         vm_object_unlock(object);
570                         break;
571                 }
572
573                 /*
574                  * Normal dereferencing path
575                  */
576                 if (object->ref_count >= 2) {
577                         object->ref_count--;
578                         lwkt_reltoken(&vm_token);
579                         vm_object_unlock(object);
580                         break;
581                 }
582
583                 /*
584                  * Termination path
585                  *
586                  * We may have to loop to resolve races if we block getting
587                  * temp's lock.  If temp is non NULL we have to swap the
588                  * lock order so the original object lock as at the top
589                  * of the lock heap.
590                  *
591                  * object has 1 ref which we can't reduce until after we have
592                  * locked temp.
593                  */
594                 lwkt_reltoken(&vm_token);
595 doterm:
596                 while ((temp = object->backing_object) != NULL) {
597                         vm_object_lock(temp);
598                         if (temp == object->backing_object)
599                                 break;
600                         vm_object_unlock(temp);
601                 }
602
603                 /*
604                  * re-check
605                  */
606                 if (object->ref_count >= 2) {
607                         object->ref_count--;
608                         if (temp)
609                                 vm_object_unlock(temp);
610                         vm_object_unlock(object);
611                         break;
612                 }
613                 KKASSERT(object->ref_count == 1);
614                 object->ref_count = 0;
615
616                 if (temp) {
617                         LIST_REMOVE(object, shadow_list);
618                         temp->shadow_count--;
619                         temp->generation++;
620                         object->backing_object = NULL;
621                         vm_object_lock_swap();
622                 }
623
624                 /*
625                  * Don't double-terminate, we could be in a termination
626                  * recursion due to the terminate having to sync data
627                  * to disk.
628                  */
629                 if ((object->flags & OBJ_DEAD) == 0) {
630                         vm_object_terminate(object);
631                         /* termination ate the object lock */
632                 } else {
633                         vm_object_unlock(object);
634                 }
635                 object = temp;
636         }
637 }
638
639 /*
640  * Destroy the specified object, freeing up related resources.
641  *
642  * The object must have zero references.
643  *
644  * The caller must be holding vmobj_token and properly interlock with
645  * OBJ_DEAD (at the moment).
646  *
647  * The caller must have locked the object only, and not be holding it.
648  * This function will eat the caller's lock on the object.
649  */
650 static int vm_object_terminate_callback(vm_page_t p, void *data);
651
652 void
653 vm_object_terminate(vm_object_t object)
654 {
655         /*
656          * Make sure no one uses us.  Once we set OBJ_DEAD we should be
657          * able to safely block.
658          */
659         KKASSERT((object->flags & OBJ_DEAD) == 0);
660         ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
661         vm_object_set_flag(object, OBJ_DEAD);
662
663         /*
664          * Wait for the pageout daemon to be done with the object
665          */
666         vm_object_pip_wait(object, "objtrm1");
667
668         KASSERT(!object->paging_in_progress,
669                 ("vm_object_terminate: pageout in progress"));
670
671         /*
672          * Clean and free the pages, as appropriate. All references to the
673          * object are gone, so we don't need to lock it.
674          */
675         if (object->type == OBJT_VNODE) {
676                 struct vnode *vp;
677
678                 /*
679                  * Clean pages and flush buffers.
680                  */
681                 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
682
683                 vp = (struct vnode *) object->handle;
684                 vinvalbuf(vp, V_SAVE, 0, 0);
685         }
686
687         /*
688          * Wait for any I/O to complete, after which there had better not
689          * be any references left on the object.
690          */
691         vm_object_pip_wait(object, "objtrm2");
692
693         if (object->ref_count != 0) {
694                 panic("vm_object_terminate: object with references, "
695                       "ref_count=%d", object->ref_count);
696         }
697
698         /*
699          * Now free any remaining pages. For internal objects, this also
700          * removes them from paging queues. Don't free wired pages, just
701          * remove them from the object. 
702          */
703         lwkt_gettoken(&vm_token);
704         vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
705                                 vm_object_terminate_callback, NULL);
706         lwkt_reltoken(&vm_token);
707
708         /*
709          * Let the pager know object is dead.
710          */
711         vm_pager_deallocate(object);
712
713         /*
714          * Wait for the object hold count to hit zero, clean out pages as
715          * we go.
716          */
717         lwkt_gettoken(&vm_token);
718         for (;;) {
719                 vm_object_hold_wait(object);
720                 if (RB_ROOT(&object->rb_memq) == NULL)
721                         break;
722                 kprintf("vm_object_terminate: Warning, object %p "
723                         "still has %d pages\n",
724                         object, object->resident_page_count);
725                 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
726                                         vm_object_terminate_callback, NULL);
727         }
728         lwkt_reltoken(&vm_token);
729
730         /*
731          * There had better not be any pages left
732          */
733         KKASSERT(object->resident_page_count == 0);
734
735         /*
736          * Remove the object from the global object list.
737          *
738          * (we are holding vmobj_token)
739          */
740         TAILQ_REMOVE(&vm_object_list, object, object_list);
741         vm_object_count--;
742         vm_object_dead_wakeup(object);
743         vm_object_unlock(object);
744
745         if (object->ref_count != 0) {
746                 panic("vm_object_terminate2: object with references, "
747                       "ref_count=%d", object->ref_count);
748         }
749
750         /*
751          * Free the space for the object.
752          */
753         zfree(obj_zone, object);
754 }
755
756 /*
757  * The caller must hold vm_token.
758  */
759 static int
760 vm_object_terminate_callback(vm_page_t p, void *data __unused)
761 {
762         if (p->busy || (p->flags & PG_BUSY))
763                 panic("vm_object_terminate: freeing busy page %p", p);
764         if (p->wire_count == 0) {
765                 vm_page_busy(p);
766                 vm_page_free(p);
767                 mycpu->gd_cnt.v_pfree++;
768         } else {
769                 if (p->queue != PQ_NONE)
770                         kprintf("vm_object_terminate: Warning: Encountered wired page %p on queue %d\n", p, p->queue);
771                 vm_page_busy(p);
772                 vm_page_remove(p);
773                 vm_page_wakeup(p);
774         }
775         return(0);
776 }
777
778 /*
779  * The object is dead but still has an object<->pager association.  Sleep
780  * and return.  The caller typically retests the association in a loop.
781  *
782  * Must be called with the vmobj_token held.
783  */
784 void
785 vm_object_dead_sleep(vm_object_t object, const char *wmesg)
786 {
787         ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
788         if (object->handle) {
789                 vm_object_set_flag(object, OBJ_DEADWNT);
790                 tsleep(object, 0, wmesg, 0);
791                 /* object may be invalid after this point */
792         }
793 }
794
795 /*
796  * Wakeup anyone waiting for the object<->pager disassociation on
797  * a dead object.
798  *
799  * Must be called with the vmobj_token held.
800  */
801 void
802 vm_object_dead_wakeup(vm_object_t object)
803 {
804         ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
805         if (object->flags & OBJ_DEADWNT) {
806                 vm_object_clear_flag(object, OBJ_DEADWNT);
807                 wakeup(object);
808         }
809 }
810
811 /*
812  * Clean all dirty pages in the specified range of object.  Leaves page
813  * on whatever queue it is currently on.   If NOSYNC is set then do not
814  * write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC),
815  * leaving the object dirty.
816  *
817  * When stuffing pages asynchronously, allow clustering.  XXX we need a
818  * synchronous clustering mode implementation.
819  *
820  * Odd semantics: if start == end, we clean everything.
821  *
822  * The object must be locked? XXX
823  */
824 static int vm_object_page_clean_pass1(struct vm_page *p, void *data);
825 static int vm_object_page_clean_pass2(struct vm_page *p, void *data);
826
827 void
828 vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
829                      int flags)
830 {
831         struct rb_vm_page_scan_info info;
832         struct vnode *vp;
833         int wholescan;
834         int pagerflags;
835         int curgeneration;
836
837         vm_object_hold(object);
838         if (object->type != OBJT_VNODE ||
839             (object->flags & OBJ_MIGHTBEDIRTY) == 0) {
840                 vm_object_drop(object);
841                 return;
842         }
843
844         pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? 
845                         VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
846         pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
847
848         vp = object->handle;
849
850         /*
851          * Interlock other major object operations.  This allows us to 
852          * temporarily clear OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY.
853          */
854         vm_object_set_flag(object, OBJ_CLEANING);
855
856         /*
857          * Handle 'entire object' case
858          */
859         info.start_pindex = start;
860         if (end == 0) {
861                 info.end_pindex = object->size - 1;
862         } else {
863                 info.end_pindex = end - 1;
864         }
865         wholescan = (start == 0 && info.end_pindex == object->size - 1);
866         info.limit = flags;
867         info.pagerflags = pagerflags;
868         info.object = object;
869
870         /*
871          * If cleaning the entire object do a pass to mark the pages read-only.
872          * If everything worked out ok, clear OBJ_WRITEABLE and
873          * OBJ_MIGHTBEDIRTY.
874          */
875         if (wholescan) {
876                 info.error = 0;
877                 lwkt_gettoken(&vm_token);
878                 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
879                                         vm_object_page_clean_pass1, &info);
880                 lwkt_reltoken(&vm_token);
881                 if (info.error == 0) {
882                         vm_object_clear_flag(object,
883                                              OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
884                         if (object->type == OBJT_VNODE &&
885                             (vp = (struct vnode *)object->handle) != NULL) {
886                                 if (vp->v_flag & VOBJDIRTY) 
887                                         vclrflags(vp, VOBJDIRTY);
888                         }
889                 }
890         }
891
892         /*
893          * Do a pass to clean all the dirty pages we find.
894          */
895         do {
896                 info.error = 0;
897                 curgeneration = object->generation;
898                 lwkt_gettoken(&vm_token);
899                 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
900                                         vm_object_page_clean_pass2, &info);
901                 lwkt_reltoken(&vm_token);
902         } while (info.error || curgeneration != object->generation);
903
904         vm_object_clear_flag(object, OBJ_CLEANING);
905         vm_object_drop(object);
906 }
907
908 /*
909  * The caller must hold vm_token.
910  */
911 static 
912 int
913 vm_object_page_clean_pass1(struct vm_page *p, void *data)
914 {
915         struct rb_vm_page_scan_info *info = data;
916
917         vm_page_flag_set(p, PG_CLEANCHK);
918         if ((info->limit & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC))
919                 info->error = 1;
920         else
921                 vm_page_protect(p, VM_PROT_READ);       /* must not block */
922         return(0);
923 }
924
925 /*
926  * The caller must hold vm_token.
927  */
928 static 
929 int
930 vm_object_page_clean_pass2(struct vm_page *p, void *data)
931 {
932         struct rb_vm_page_scan_info *info = data;
933         int n;
934
935         /*
936          * Do not mess with pages that were inserted after we started
937          * the cleaning pass.
938          */
939         if ((p->flags & PG_CLEANCHK) == 0)
940                 return(0);
941
942         /*
943          * Before wasting time traversing the pmaps, check for trivial
944          * cases where the page cannot be dirty.
945          */
946         if (p->valid == 0 || (p->queue - p->pc) == PQ_CACHE) {
947                 KKASSERT((p->dirty & p->valid) == 0);
948                 return(0);
949         }
950
951         /*
952          * Check whether the page is dirty or not.  The page has been set
953          * to be read-only so the check will not race a user dirtying the
954          * page.
955          */
956         vm_page_test_dirty(p);
957         if ((p->dirty & p->valid) == 0) {
958                 vm_page_flag_clear(p, PG_CLEANCHK);
959                 return(0);
960         }
961
962         /*
963          * If we have been asked to skip nosync pages and this is a
964          * nosync page, skip it.  Note that the object flags were
965          * not cleared in this case (because pass1 will have returned an
966          * error), so we do not have to set them.
967          */
968         if ((info->limit & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
969                 vm_page_flag_clear(p, PG_CLEANCHK);
970                 return(0);
971         }
972
973         /*
974          * Flush as many pages as we can.  PG_CLEANCHK will be cleared on
975          * the pages that get successfully flushed.  Set info->error if
976          * we raced an object modification.
977          */
978         n = vm_object_page_collect_flush(info->object, p, info->pagerflags);
979         if (n == 0)
980                 info->error = 1;
981         return(0);
982 }
983
984 /*
985  * Collect the specified page and nearby pages and flush them out.
986  * The number of pages flushed is returned.
987  *
988  * The caller must hold vm_token.
989  */
990 static int
991 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags)
992 {
993         int runlen;
994         int maxf;
995         int chkb;
996         int maxb;
997         int i;
998         int curgeneration;
999         vm_pindex_t pi;
1000         vm_page_t maf[vm_pageout_page_count];
1001         vm_page_t mab[vm_pageout_page_count];
1002         vm_page_t ma[vm_pageout_page_count];
1003
1004         curgeneration = object->generation;
1005
1006         pi = p->pindex;
1007         while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
1008                 if (object->generation != curgeneration) {
1009                         return(0);
1010                 }
1011         }
1012         KKASSERT(p->object == object && p->pindex == pi);
1013
1014         maxf = 0;
1015         for(i = 1; i < vm_pageout_page_count; i++) {
1016                 vm_page_t tp;
1017
1018                 if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
1019                         if ((tp->flags & PG_BUSY) ||
1020                                 ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 && 
1021                                  (tp->flags & PG_CLEANCHK) == 0) ||
1022                                 (tp->busy != 0))
1023                                 break;
1024                         if((tp->queue - tp->pc) == PQ_CACHE) {
1025                                 vm_page_flag_clear(tp, PG_CLEANCHK);
1026                                 break;
1027                         }
1028                         vm_page_test_dirty(tp);
1029                         if ((tp->dirty & tp->valid) == 0) {
1030                                 vm_page_flag_clear(tp, PG_CLEANCHK);
1031                                 break;
1032                         }
1033                         maf[ i - 1 ] = tp;
1034                         maxf++;
1035                         continue;
1036                 }
1037                 break;
1038         }
1039
1040         maxb = 0;
1041         chkb = vm_pageout_page_count -  maxf;
1042         if (chkb) {
1043                 for(i = 1; i < chkb;i++) {
1044                         vm_page_t tp;
1045
1046                         if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
1047                                 if ((tp->flags & PG_BUSY) ||
1048                                         ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 && 
1049                                          (tp->flags & PG_CLEANCHK) == 0) ||
1050                                         (tp->busy != 0))
1051                                         break;
1052                                 if((tp->queue - tp->pc) == PQ_CACHE) {
1053                                         vm_page_flag_clear(tp, PG_CLEANCHK);
1054                                         break;
1055                                 }
1056                                 vm_page_test_dirty(tp);
1057                                 if ((tp->dirty & tp->valid) == 0) {
1058                                         vm_page_flag_clear(tp, PG_CLEANCHK);
1059                                         break;
1060                                 }
1061                                 mab[ i - 1 ] = tp;
1062                                 maxb++;
1063                                 continue;
1064                         }
1065                         break;
1066                 }
1067         }
1068
1069         for(i = 0; i < maxb; i++) {
1070                 int index = (maxb - i) - 1;
1071                 ma[index] = mab[i];
1072                 vm_page_flag_clear(ma[index], PG_CLEANCHK);
1073         }
1074         vm_page_flag_clear(p, PG_CLEANCHK);
1075         ma[maxb] = p;
1076         for(i = 0; i < maxf; i++) {
1077                 int index = (maxb + i) + 1;
1078                 ma[index] = maf[i];
1079                 vm_page_flag_clear(ma[index], PG_CLEANCHK);
1080         }
1081         runlen = maxb + maxf + 1;
1082
1083         vm_pageout_flush(ma, runlen, pagerflags);
1084         for (i = 0; i < runlen; i++) {
1085                 if (ma[i]->valid & ma[i]->dirty) {
1086                         vm_page_protect(ma[i], VM_PROT_READ);
1087                         vm_page_flag_set(ma[i], PG_CLEANCHK);
1088
1089                         /*
1090                          * maxf will end up being the actual number of pages
1091                          * we wrote out contiguously, non-inclusive of the
1092                          * first page.  We do not count look-behind pages.
1093                          */
1094                         if (i >= maxb + 1 && (maxf > i - maxb - 1))
1095                                 maxf = i - maxb - 1;
1096                 }
1097         }
1098         return(maxf + 1);
1099 }
1100
1101 /*
1102  * Same as vm_object_pmap_copy, except range checking really
1103  * works, and is meant for small sections of an object.
1104  *
1105  * This code protects resident pages by making them read-only
1106  * and is typically called on a fork or split when a page
1107  * is converted to copy-on-write.  
1108  *
1109  * NOTE: If the page is already at VM_PROT_NONE, calling
1110  * vm_page_protect will have no effect.
1111  */
1112 void
1113 vm_object_pmap_copy_1(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
1114 {
1115         vm_pindex_t idx;
1116         vm_page_t p;
1117
1118         if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0)
1119                 return;
1120
1121         /*
1122          * spl protection needed to prevent races between the lookup,
1123          * an interrupt unbusy/free, and our protect call.
1124          */
1125         lwkt_gettoken(&vm_token);
1126         for (idx = start; idx < end; idx++) {
1127                 p = vm_page_lookup(object, idx);
1128                 if (p == NULL)
1129                         continue;
1130                 vm_page_protect(p, VM_PROT_READ);
1131         }
1132         lwkt_reltoken(&vm_token);
1133 }
1134
1135 /*
1136  * Removes all physical pages in the specified object range from all
1137  * physical maps.
1138  *
1139  * The object must *not* be locked.
1140  */
1141
1142 static int vm_object_pmap_remove_callback(vm_page_t p, void *data);
1143
1144 void
1145 vm_object_pmap_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
1146 {
1147         struct rb_vm_page_scan_info info;
1148
1149         if (object == NULL)
1150                 return;
1151         info.start_pindex = start;
1152         info.end_pindex = end - 1;
1153
1154         lwkt_gettoken(&vm_token);
1155         vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
1156                                 vm_object_pmap_remove_callback, &info);
1157         if (start == 0 && end == object->size)
1158                 vm_object_clear_flag(object, OBJ_WRITEABLE);
1159         lwkt_reltoken(&vm_token);
1160 }
1161
1162 /*
1163  * The caller must hold vm_token.
1164  */
1165 static int
1166 vm_object_pmap_remove_callback(vm_page_t p, void *data __unused)
1167 {
1168         vm_page_protect(p, VM_PROT_NONE);
1169         return(0);
1170 }
1171
1172 /*
1173  * Implements the madvise function at the object/page level.
1174  *
1175  * MADV_WILLNEED        (any object)
1176  *
1177  *      Activate the specified pages if they are resident.
1178  *
1179  * MADV_DONTNEED        (any object)
1180  *
1181  *      Deactivate the specified pages if they are resident.
1182  *
1183  * MADV_FREE    (OBJT_DEFAULT/OBJT_SWAP objects, OBJ_ONEMAPPING only)
1184  *
1185  *      Deactivate and clean the specified pages if they are
1186  *      resident.  This permits the process to reuse the pages
1187  *      without faulting or the kernel to reclaim the pages
1188  *      without I/O.
1189  *
1190  * No requirements.
1191  */
1192 void
1193 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
1194 {
1195         vm_pindex_t end, tpindex;
1196         vm_object_t tobject;
1197         vm_page_t m;
1198
1199         if (object == NULL)
1200                 return;
1201
1202         end = pindex + count;
1203
1204         lwkt_gettoken(&vm_token);
1205
1206         /*
1207          * Locate and adjust resident pages
1208          */
1209         for (; pindex < end; pindex += 1) {
1210 relookup:
1211                 tobject = object;
1212                 tpindex = pindex;
1213 shadowlookup:
1214                 /*
1215                  * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
1216                  * and those pages must be OBJ_ONEMAPPING.
1217                  */
1218                 if (advise == MADV_FREE) {
1219                         if ((tobject->type != OBJT_DEFAULT &&
1220                              tobject->type != OBJT_SWAP) ||
1221                             (tobject->flags & OBJ_ONEMAPPING) == 0) {
1222                                 continue;
1223                         }
1224                 }
1225
1226                 /*
1227                  * spl protection is required to avoid a race between the
1228                  * lookup, an interrupt unbusy/free, and our busy check.
1229                  */
1230
1231                 m = vm_page_lookup(tobject, tpindex);
1232
1233                 if (m == NULL) {
1234                         /*
1235                          * There may be swap even if there is no backing page
1236                          */
1237                         if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1238                                 swap_pager_freespace(tobject, tpindex, 1);
1239
1240                         /*
1241                          * next object
1242                          */
1243                         if (tobject->backing_object == NULL)
1244                                 continue;
1245                         tpindex += OFF_TO_IDX(tobject->backing_object_offset);
1246                         tobject = tobject->backing_object;
1247                         goto shadowlookup;
1248                 }
1249
1250                 /*
1251                  * If the page is busy or not in a normal active state,
1252                  * we skip it.  If the page is not managed there are no
1253                  * page queues to mess with.  Things can break if we mess
1254                  * with pages in any of the below states.
1255                  */
1256                 if (
1257                     m->hold_count ||
1258                     m->wire_count ||
1259                     (m->flags & PG_UNMANAGED) ||
1260                     m->valid != VM_PAGE_BITS_ALL
1261                 ) {
1262                         continue;
1263                 }
1264
1265                 if (vm_page_sleep_busy(m, TRUE, "madvpo")) {
1266                         goto relookup;
1267                 }
1268                 vm_page_busy(m);
1269
1270                 /*
1271                  * Theoretically once a page is known not to be busy, an
1272                  * interrupt cannot come along and rip it out from under us.
1273                  */
1274
1275                 if (advise == MADV_WILLNEED) {
1276                         vm_page_activate(m);
1277                 } else if (advise == MADV_DONTNEED) {
1278                         vm_page_dontneed(m);
1279                 } else if (advise == MADV_FREE) {
1280                         /*
1281                          * Mark the page clean.  This will allow the page
1282                          * to be freed up by the system.  However, such pages
1283                          * are often reused quickly by malloc()/free()
1284                          * so we do not do anything that would cause
1285                          * a page fault if we can help it.
1286                          *
1287                          * Specifically, we do not try to actually free
1288                          * the page now nor do we try to put it in the
1289                          * cache (which would cause a page fault on reuse).
1290                          *
1291                          * But we do make the page is freeable as we
1292                          * can without actually taking the step of unmapping
1293                          * it.
1294                          */
1295                         pmap_clear_modify(m);
1296                         m->dirty = 0;
1297                         m->act_count = 0;
1298                         vm_page_dontneed(m);
1299                         if (tobject->type == OBJT_SWAP)
1300                                 swap_pager_freespace(tobject, tpindex, 1);
1301                 }
1302                 vm_page_wakeup(m);
1303         }       
1304         lwkt_reltoken(&vm_token);
1305 }
1306
1307 /*
1308  * Create a new object which is backed by the specified existing object
1309  * range.  The source object reference is deallocated.
1310  *
1311  * The new object and offset into that object are returned in the source
1312  * parameters.
1313  *
1314  * No other requirements.
1315  */
1316 void
1317 vm_object_shadow(vm_object_t *object, vm_ooffset_t *offset, vm_size_t length)
1318 {
1319         vm_object_t source;
1320         vm_object_t result;
1321
1322         source = *object;
1323
1324         if (source)
1325                 vm_object_hold(source);
1326
1327         /*
1328          * Don't create the new object if the old object isn't shared.
1329          */
1330         lwkt_gettoken(&vm_token);
1331
1332         if (source != NULL &&
1333             source->ref_count == 1 &&
1334             source->handle == NULL &&
1335             (source->type == OBJT_DEFAULT ||
1336              source->type == OBJT_SWAP)) {
1337                 lwkt_reltoken(&vm_token);
1338                 vm_object_drop(source);
1339                 return;
1340         }
1341
1342         /*
1343          * Allocate a new object with the given length
1344          */
1345
1346         if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL)
1347                 panic("vm_object_shadow: no object for shadowing");
1348
1349         /*
1350          * The new object shadows the source object, adding a reference to it.
1351          * Our caller changes his reference to point to the new object,
1352          * removing a reference to the source object.  Net result: no change
1353          * of reference count.
1354          *
1355          * Try to optimize the result object's page color when shadowing
1356          * in order to maintain page coloring consistency in the combined 
1357          * shadowed object.
1358          */
1359         result->backing_object = source;
1360         if (source) {
1361                 LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
1362                 source->shadow_count++;
1363                 source->generation++;
1364                 result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & PQ_L2_MASK;
1365         }
1366
1367         /*
1368          * Store the offset into the source object, and fix up the offset into
1369          * the new object.
1370          */
1371         result->backing_object_offset = *offset;
1372         lwkt_reltoken(&vm_token);
1373
1374         if (source)
1375                 vm_object_drop(source);
1376
1377         /*
1378          * Return the new things
1379          */
1380         *offset = 0;
1381         *object = result;
1382 }
1383
1384 #define OBSC_TEST_ALL_SHADOWED  0x0001
1385 #define OBSC_COLLAPSE_NOWAIT    0x0002
1386 #define OBSC_COLLAPSE_WAIT      0x0004
1387
1388 static int vm_object_backing_scan_callback(vm_page_t p, void *data);
1389
1390 /*
1391  * The caller must hold vm_token.
1392  */
1393 static __inline int
1394 vm_object_backing_scan(vm_object_t object, int op)
1395 {
1396         struct rb_vm_page_scan_info info;
1397         vm_object_t backing_object;
1398
1399         backing_object = object->backing_object;
1400         info.backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
1401
1402         /*
1403          * Initial conditions
1404          */
1405
1406         if (op & OBSC_TEST_ALL_SHADOWED) {
1407                 /*
1408                  * We do not want to have to test for the existence of
1409                  * swap pages in the backing object.  XXX but with the
1410                  * new swapper this would be pretty easy to do.
1411                  *
1412                  * XXX what about anonymous MAP_SHARED memory that hasn't
1413                  * been ZFOD faulted yet?  If we do not test for this, the
1414                  * shadow test may succeed! XXX
1415                  */
1416                 if (backing_object->type != OBJT_DEFAULT) {
1417                         return(0);
1418                 }
1419         }
1420         if (op & OBSC_COLLAPSE_WAIT) {
1421                 KKASSERT((backing_object->flags & OBJ_DEAD) == 0);
1422                 vm_object_set_flag(backing_object, OBJ_DEAD);
1423         }
1424
1425         /*
1426          * Our scan.   We have to retry if a negative error code is returned,
1427          * otherwise 0 or 1 will be returned in info.error.  0 Indicates that
1428          * the scan had to be stopped because the parent does not completely
1429          * shadow the child.
1430          */
1431         info.object = object;
1432         info.backing_object = backing_object;
1433         info.limit = op;
1434         do {
1435                 info.error = 1;
1436                 vm_page_rb_tree_RB_SCAN(&backing_object->rb_memq, NULL,
1437                                         vm_object_backing_scan_callback,
1438                                         &info);
1439         } while (info.error < 0);
1440
1441         return(info.error);
1442 }
1443
1444 /*
1445  * The caller must hold vm_token.
1446  */
1447 static int
1448 vm_object_backing_scan_callback(vm_page_t p, void *data)
1449 {
1450         struct rb_vm_page_scan_info *info = data;
1451         vm_object_t backing_object;
1452         vm_object_t object;
1453         vm_pindex_t new_pindex;
1454         vm_pindex_t backing_offset_index;
1455         int op;
1456
1457         new_pindex = p->pindex - info->backing_offset_index;
1458         op = info->limit;
1459         object = info->object;
1460         backing_object = info->backing_object;
1461         backing_offset_index = info->backing_offset_index;
1462
1463         if (op & OBSC_TEST_ALL_SHADOWED) {
1464                 vm_page_t pp;
1465
1466                 /*
1467                  * Ignore pages outside the parent object's range
1468                  * and outside the parent object's mapping of the 
1469                  * backing object.
1470                  *
1471                  * note that we do not busy the backing object's
1472                  * page.
1473                  */
1474                 if (
1475                     p->pindex < backing_offset_index ||
1476                     new_pindex >= object->size
1477                 ) {
1478                         return(0);
1479                 }
1480
1481                 /*
1482                  * See if the parent has the page or if the parent's
1483                  * object pager has the page.  If the parent has the
1484                  * page but the page is not valid, the parent's
1485                  * object pager must have the page.
1486                  *
1487                  * If this fails, the parent does not completely shadow
1488                  * the object and we might as well give up now.
1489                  */
1490
1491                 pp = vm_page_lookup(object, new_pindex);
1492                 if ((pp == NULL || pp->valid == 0) &&
1493                     !vm_pager_has_page(object, new_pindex)
1494                 ) {
1495                         info->error = 0;        /* problemo */
1496                         return(-1);             /* stop the scan */
1497                 }
1498         }
1499
1500         /*
1501          * Check for busy page
1502          */
1503
1504         if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
1505                 vm_page_t pp;
1506
1507                 if (op & OBSC_COLLAPSE_NOWAIT) {
1508                         if (
1509                             (p->flags & PG_BUSY) ||
1510                             !p->valid || 
1511                             p->hold_count || 
1512                             p->wire_count ||
1513                             p->busy
1514                         ) {
1515                                 return(0);
1516                         }
1517                 } else if (op & OBSC_COLLAPSE_WAIT) {
1518                         if (vm_page_sleep_busy(p, TRUE, "vmocol")) {
1519                                 /*
1520                                  * If we slept, anything could have
1521                                  * happened.   Ask that the scan be restarted.
1522                                  *
1523                                  * Since the object is marked dead, the
1524                                  * backing offset should not have changed.  
1525                                  */
1526                                 info->error = -1;
1527                                 return(-1);
1528                         }
1529                 }
1530
1531                 /* 
1532                  * Busy the page
1533                  */
1534                 vm_page_busy(p);
1535
1536                 KASSERT(
1537                     p->object == backing_object,
1538                     ("vm_object_qcollapse(): object mismatch")
1539                 );
1540
1541                 /*
1542                  * Destroy any associated swap
1543                  */
1544                 if (backing_object->type == OBJT_SWAP)
1545                         swap_pager_freespace(backing_object, p->pindex, 1);
1546
1547                 if (
1548                     p->pindex < backing_offset_index ||
1549                     new_pindex >= object->size
1550                 ) {
1551                         /*
1552                          * Page is out of the parent object's range, we 
1553                          * can simply destroy it. 
1554                          */
1555                         vm_page_protect(p, VM_PROT_NONE);
1556                         vm_page_free(p);
1557                         return(0);
1558                 }
1559
1560                 pp = vm_page_lookup(object, new_pindex);
1561                 if (pp != NULL || vm_pager_has_page(object, new_pindex)) {
1562                         /*
1563                          * page already exists in parent OR swap exists
1564                          * for this location in the parent.  Destroy 
1565                          * the original page from the backing object.
1566                          *
1567                          * Leave the parent's page alone
1568                          */
1569                         vm_page_protect(p, VM_PROT_NONE);
1570                         vm_page_free(p);
1571                         return(0);
1572                 }
1573
1574                 /*
1575                  * Page does not exist in parent, rename the
1576                  * page from the backing object to the main object. 
1577                  *
1578                  * If the page was mapped to a process, it can remain 
1579                  * mapped through the rename.
1580                  */
1581                 if ((p->queue - p->pc) == PQ_CACHE)
1582                         vm_page_deactivate(p);
1583
1584                 vm_page_rename(p, object, new_pindex);
1585                 /* page automatically made dirty by rename */
1586         }
1587         return(0);
1588 }
1589
1590 /*
1591  * This version of collapse allows the operation to occur earlier and
1592  * when paging_in_progress is true for an object...  This is not a complete
1593  * operation, but should plug 99.9% of the rest of the leaks.
1594  *
1595  * The caller must hold vm_token and vmobj_token.
1596  * (only called from vm_object_collapse)
1597  */
1598 static void
1599 vm_object_qcollapse(vm_object_t object)
1600 {
1601         vm_object_t backing_object = object->backing_object;
1602
1603         if (backing_object->ref_count != 1)
1604                 return;
1605
1606         backing_object->ref_count += 2;
1607
1608         vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
1609
1610         backing_object->ref_count -= 2;
1611 }
1612
1613 /*
1614  * Collapse an object with the object backing it.  Pages in the backing
1615  * object are moved into the parent, and the backing object is deallocated.
1616  *
1617  * The caller must hold (object).
1618  */
1619 void
1620 vm_object_collapse(vm_object_t object)
1621 {
1622         ASSERT_LWKT_TOKEN_HELD(&vm_token);
1623         ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
1624         vm_object_assert_held(object);
1625
1626         while (TRUE) {
1627                 vm_object_t backing_object;
1628
1629                 /*
1630                  * Verify that the conditions are right for collapse:
1631                  *
1632                  * The object exists and the backing object exists.
1633                  */
1634                 if (object == NULL)
1635                         break;
1636
1637                 if ((backing_object = object->backing_object) == NULL)
1638                         break;
1639
1640                 vm_object_hold(backing_object);
1641                 if (backing_object != object->backing_object) {
1642                         vm_object_drop(backing_object);
1643                         continue;
1644                 }
1645
1646                 /*
1647                  * we check the backing object first, because it is most likely
1648                  * not collapsable.
1649                  */
1650                 if (backing_object->handle != NULL ||
1651                     (backing_object->type != OBJT_DEFAULT &&
1652                      backing_object->type != OBJT_SWAP) ||
1653                     (backing_object->flags & OBJ_DEAD) ||
1654                     object->handle != NULL ||
1655                     (object->type != OBJT_DEFAULT &&
1656                      object->type != OBJT_SWAP) ||
1657                     (object->flags & OBJ_DEAD)) {
1658                         vm_object_drop(backing_object);
1659                         break;
1660                 }
1661
1662                 if (
1663                     object->paging_in_progress != 0 ||
1664                     backing_object->paging_in_progress != 0
1665                 ) {
1666                         vm_object_drop(backing_object);
1667                         vm_object_qcollapse(object);
1668                         break;
1669                 }
1670
1671                 /*
1672                  * We know that we can either collapse the backing object (if
1673                  * the parent is the only reference to it) or (perhaps) have
1674                  * the parent bypass the object if the parent happens to shadow
1675                  * all the resident pages in the entire backing object.
1676                  *
1677                  * This is ignoring pager-backed pages such as swap pages.
1678                  * vm_object_backing_scan fails the shadowing test in this
1679                  * case.
1680                  */
1681
1682                 if (backing_object->ref_count == 1) {
1683                         /*
1684                          * If there is exactly one reference to the backing
1685                          * object, we can collapse it into the parent.  
1686                          */
1687                         vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
1688
1689                         /*
1690                          * Move the pager from backing_object to object.
1691                          */
1692                         if (backing_object->type == OBJT_SWAP) {
1693                                 vm_object_pip_add(backing_object, 1);
1694
1695                                 /*
1696                                  * scrap the paging_offset junk and do a 
1697                                  * discrete copy.  This also removes major 
1698                                  * assumptions about how the swap-pager 
1699                                  * works from where it doesn't belong.  The
1700                                  * new swapper is able to optimize the
1701                                  * destroy-source case.
1702                                  */
1703
1704                                 vm_object_pip_add(object, 1);
1705                                 swap_pager_copy(
1706                                     backing_object,
1707                                     object,
1708                                     OFF_TO_IDX(object->backing_object_offset), TRUE);
1709                                 vm_object_pip_wakeup(object);
1710
1711                                 vm_object_pip_wakeup(backing_object);
1712                         }
1713                         /*
1714                          * Object now shadows whatever backing_object did.
1715                          * Note that the reference to 
1716                          * backing_object->backing_object moves from within 
1717                          * backing_object to within object.
1718                          */
1719
1720                         LIST_REMOVE(object, shadow_list);
1721                         object->backing_object->shadow_count--;
1722                         object->backing_object->generation++;
1723                         if (backing_object->backing_object) {
1724                                 LIST_REMOVE(backing_object, shadow_list);
1725                                 backing_object->backing_object->shadow_count--;
1726                                 backing_object->backing_object->generation++;
1727                         }
1728                         object->backing_object = backing_object->backing_object;
1729                         if (object->backing_object) {
1730                                 LIST_INSERT_HEAD(
1731                                     &object->backing_object->shadow_head,
1732                                     object, 
1733                                     shadow_list
1734                                 );
1735                                 object->backing_object->shadow_count++;
1736                                 object->backing_object->generation++;
1737                         }
1738
1739                         object->backing_object_offset +=
1740                             backing_object->backing_object_offset;
1741
1742                         /*
1743                          * Discard backing_object.
1744                          *
1745                          * Since the backing object has no pages, no pager left,
1746                          * and no object references within it, all that is
1747                          * necessary is to dispose of it.
1748                          */
1749
1750                         KASSERT(backing_object->ref_count == 1,
1751                                 ("backing_object %p was somehow "
1752                                  "re-referenced during collapse!",
1753                                  backing_object));
1754                         KASSERT(RB_EMPTY(&backing_object->rb_memq),
1755                                 ("backing_object %p somehow has left "
1756                                  "over pages during collapse!",
1757                                  backing_object));
1758
1759                         /*
1760                          * Wait for hold count to hit zero
1761                          */
1762                         vm_object_drop(backing_object);
1763                         vm_object_hold_wait(backing_object);
1764
1765                         /* (we are holding vmobj_token) */
1766                         TAILQ_REMOVE(&vm_object_list, backing_object,
1767                                      object_list);
1768                         --backing_object->ref_count;    /* safety/debug */
1769                         vm_object_count--;
1770
1771                         zfree(obj_zone, backing_object);
1772
1773                         object_collapses++;
1774                 } else {
1775                         vm_object_t new_backing_object;
1776
1777                         /*
1778                          * If we do not entirely shadow the backing object,
1779                          * there is nothing we can do so we give up.
1780                          */
1781
1782                         if (vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) {
1783                                 vm_object_drop(backing_object);
1784                                 break;
1785                         }
1786
1787                         /*
1788                          * Make the parent shadow the next object in the
1789                          * chain.  Deallocating backing_object will not remove
1790                          * it, since its reference count is at least 2.
1791                          */
1792
1793                         LIST_REMOVE(object, shadow_list);
1794                         backing_object->shadow_count--;
1795                         backing_object->generation++;
1796
1797                         new_backing_object = backing_object->backing_object;
1798                         if ((object->backing_object = new_backing_object) != NULL) {
1799                                 vm_object_reference(new_backing_object);
1800                                 LIST_INSERT_HEAD(
1801                                     &new_backing_object->shadow_head,
1802                                     object,
1803                                     shadow_list
1804                                 );
1805                                 new_backing_object->shadow_count++;
1806                                 new_backing_object->generation++;
1807                                 object->backing_object_offset +=
1808                                         backing_object->backing_object_offset;
1809                         }
1810
1811                         /*
1812                          * Drop the reference count on backing_object. Since
1813                          * its ref_count was at least 2, it will not vanish;
1814                          * so we don't need to call vm_object_deallocate, but
1815                          * we do anyway.
1816                          */
1817                         vm_object_drop(backing_object);
1818                         vm_object_deallocate_locked(backing_object);
1819                         object_bypasses++;
1820                 }
1821
1822                 /*
1823                  * Try again with this object's new backing object.
1824                  */
1825         }
1826 }
1827
1828 /*
1829  * Removes all physical pages in the specified object range from the
1830  * object's list of pages.
1831  *
1832  * No requirements.
1833  */
1834 static int vm_object_page_remove_callback(vm_page_t p, void *data);
1835
1836 void
1837 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
1838                       boolean_t clean_only)
1839 {
1840         struct rb_vm_page_scan_info info;
1841         int all;
1842
1843         /*
1844          * Degenerate cases and assertions
1845          */
1846         lwkt_gettoken(&vm_token);
1847         if (object == NULL ||
1848             (object->resident_page_count == 0 && object->swblock_count == 0)) {
1849                 lwkt_reltoken(&vm_token);
1850                 return;
1851         }
1852         KASSERT(object->type != OBJT_PHYS, 
1853                 ("attempt to remove pages from a physical object"));
1854
1855         /*
1856          * Indicate that paging is occuring on the object
1857          */
1858         vm_object_pip_add(object, 1);
1859
1860         /*
1861          * Figure out the actual removal range and whether we are removing
1862          * the entire contents of the object or not.  If removing the entire
1863          * contents, be sure to get all pages, even those that might be 
1864          * beyond the end of the object.
1865          */
1866         info.start_pindex = start;
1867         if (end == 0)
1868                 info.end_pindex = (vm_pindex_t)-1;
1869         else
1870                 info.end_pindex = end - 1;
1871         info.limit = clean_only;
1872         all = (start == 0 && info.end_pindex >= object->size - 1);
1873
1874         /*
1875          * Loop until we are sure we have gotten them all.
1876          */
1877         do {
1878                 info.error = 0;
1879                 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
1880                                         vm_object_page_remove_callback, &info);
1881         } while (info.error);
1882
1883         /*
1884          * Remove any related swap if throwing away pages, or for
1885          * non-swap objects (the swap is a clean copy in that case).
1886          */
1887         if (object->type != OBJT_SWAP || clean_only == FALSE) {
1888                 if (all)
1889                         swap_pager_freespace_all(object);
1890                 else
1891                         swap_pager_freespace(object, info.start_pindex,
1892                              info.end_pindex - info.start_pindex + 1);
1893         }
1894
1895         /*
1896          * Cleanup
1897          */
1898         vm_object_pip_wakeup(object);
1899         lwkt_reltoken(&vm_token);
1900 }
1901
1902 /*
1903  * The caller must hold vm_token.
1904  */
1905 static int
1906 vm_object_page_remove_callback(vm_page_t p, void *data)
1907 {
1908         struct rb_vm_page_scan_info *info = data;
1909
1910         /*
1911          * Wired pages cannot be destroyed, but they can be invalidated
1912          * and we do so if clean_only (limit) is not set.
1913          *
1914          * WARNING!  The page may be wired due to being part of a buffer
1915          *           cache buffer, and the buffer might be marked B_CACHE.
1916          *           This is fine as part of a truncation but VFSs must be
1917          *           sure to fix the buffer up when re-extending the file.
1918          */
1919         if (p->wire_count != 0) {
1920                 vm_page_protect(p, VM_PROT_NONE);
1921                 if (info->limit == 0)
1922                         p->valid = 0;
1923                 return(0);
1924         }
1925
1926         /*
1927          * The busy flags are only cleared at
1928          * interrupt -- minimize the spl transitions
1929          */
1930
1931         if (vm_page_sleep_busy(p, TRUE, "vmopar")) {
1932                 info->error = 1;
1933                 return(0);
1934         }
1935
1936         /*
1937          * limit is our clean_only flag.  If set and the page is dirty, do
1938          * not free it.  If set and the page is being held by someone, do
1939          * not free it.
1940          */
1941         if (info->limit && p->valid) {
1942                 vm_page_test_dirty(p);
1943                 if (p->valid & p->dirty)
1944                         return(0);
1945                 if (p->hold_count)
1946                         return(0);
1947         }
1948
1949         /*
1950          * Destroy the page
1951          */
1952         vm_page_busy(p);
1953         vm_page_protect(p, VM_PROT_NONE);
1954         vm_page_free(p);
1955         return(0);
1956 }
1957
1958 /*
1959  * Coalesces two objects backing up adjoining regions of memory into a
1960  * single object.
1961  *
1962  * returns TRUE if objects were combined.
1963  *
1964  * NOTE: Only works at the moment if the second object is NULL -
1965  *       if it's not, which object do we lock first?
1966  *
1967  * Parameters:
1968  *      prev_object     First object to coalesce
1969  *      prev_offset     Offset into prev_object
1970  *      next_object     Second object into coalesce
1971  *      next_offset     Offset into next_object
1972  *
1973  *      prev_size       Size of reference to prev_object
1974  *      next_size       Size of reference to next_object
1975  *
1976  * The caller must hold vm_token and vmobj_token.
1977  *
1978  * The caller does not need to hold (prev_object) but must have a stable
1979  * pointer to it (typically by holding the vm_map locked).
1980  */
1981 boolean_t
1982 vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
1983                    vm_size_t prev_size, vm_size_t next_size)
1984 {
1985         vm_pindex_t next_pindex;
1986
1987         ASSERT_LWKT_TOKEN_HELD(&vm_token);
1988         ASSERT_LWKT_TOKEN_HELD(&vmobj_token);
1989
1990         if (prev_object == NULL) {
1991                 return (TRUE);
1992         }
1993
1994         vm_object_hold(prev_object);
1995
1996         if (prev_object->type != OBJT_DEFAULT &&
1997             prev_object->type != OBJT_SWAP) {
1998                 vm_object_drop(prev_object);
1999                 return (FALSE);
2000         }
2001
2002         /*
2003          * Try to collapse the object first
2004          */
2005         vm_object_collapse(prev_object);
2006
2007         /*
2008          * Can't coalesce if: . more than one reference . paged out . shadows
2009          * another object . has a copy elsewhere (any of which mean that the
2010          * pages not mapped to prev_entry may be in use anyway)
2011          */
2012
2013         if (prev_object->backing_object != NULL) {
2014                 vm_object_drop(prev_object);
2015                 return (FALSE);
2016         }
2017
2018         prev_size >>= PAGE_SHIFT;
2019         next_size >>= PAGE_SHIFT;
2020         next_pindex = prev_pindex + prev_size;
2021
2022         if ((prev_object->ref_count > 1) &&
2023             (prev_object->size != next_pindex)) {
2024                 vm_object_drop(prev_object);
2025                 return (FALSE);
2026         }
2027
2028         /*
2029          * Remove any pages that may still be in the object from a previous
2030          * deallocation.
2031          */
2032         if (next_pindex < prev_object->size) {
2033                 vm_object_page_remove(prev_object,
2034                                       next_pindex,
2035                                       next_pindex + next_size, FALSE);
2036                 if (prev_object->type == OBJT_SWAP)
2037                         swap_pager_freespace(prev_object,
2038                                              next_pindex, next_size);
2039         }
2040
2041         /*
2042          * Extend the object if necessary.
2043          */
2044         if (next_pindex + next_size > prev_object->size)
2045                 prev_object->size = next_pindex + next_size;
2046
2047         vm_object_drop(prev_object);
2048         return (TRUE);
2049 }
2050
2051 /*
2052  * Make the object writable and flag is being possibly dirty.
2053  *
2054  * No requirements.
2055  */
2056 void
2057 vm_object_set_writeable_dirty(vm_object_t object)
2058 {
2059         struct vnode *vp;
2060
2061         lwkt_gettoken(&vm_token);
2062         vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
2063         if (object->type == OBJT_VNODE &&
2064             (vp = (struct vnode *)object->handle) != NULL) {
2065                 if ((vp->v_flag & VOBJDIRTY) == 0) {
2066                         vsetflags(vp, VOBJDIRTY);
2067                 }
2068         }
2069         lwkt_reltoken(&vm_token);
2070 }
2071
2072 #include "opt_ddb.h"
2073 #ifdef DDB
2074 #include <sys/kernel.h>
2075
2076 #include <sys/cons.h>
2077
2078 #include <ddb/ddb.h>
2079
2080 static int      _vm_object_in_map (vm_map_t map, vm_object_t object,
2081                                        vm_map_entry_t entry);
2082 static int      vm_object_in_map (vm_object_t object);
2083
2084 /*
2085  * The caller must hold vm_token.
2086  */
2087 static int
2088 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
2089 {
2090         vm_map_t tmpm;
2091         vm_map_entry_t tmpe;
2092         vm_object_t obj;
2093         int entcount;
2094
2095         if (map == 0)
2096                 return 0;
2097         if (entry == 0) {
2098                 tmpe = map->header.next;
2099                 entcount = map->nentries;
2100                 while (entcount-- && (tmpe != &map->header)) {
2101                         if( _vm_object_in_map(map, object, tmpe)) {
2102                                 return 1;
2103                         }
2104                         tmpe = tmpe->next;
2105                 }
2106                 return (0);
2107         }
2108         switch(entry->maptype) {
2109         case VM_MAPTYPE_SUBMAP:
2110                 tmpm = entry->object.sub_map;
2111                 tmpe = tmpm->header.next;
2112                 entcount = tmpm->nentries;
2113                 while (entcount-- && tmpe != &tmpm->header) {
2114                         if( _vm_object_in_map(tmpm, object, tmpe)) {
2115                                 return 1;
2116                         }
2117                         tmpe = tmpe->next;
2118                 }
2119                 break;
2120         case VM_MAPTYPE_NORMAL:
2121         case VM_MAPTYPE_VPAGETABLE:
2122                 obj = entry->object.vm_object;
2123                 while (obj) {
2124                         if (obj == object)
2125                                 return 1;
2126                         obj = obj->backing_object;
2127                 }
2128                 break;
2129         default:
2130                 break;
2131         }
2132         return 0;
2133 }
2134
2135 static int vm_object_in_map_callback(struct proc *p, void *data);
2136
2137 struct vm_object_in_map_info {
2138         vm_object_t object;
2139         int rv;
2140 };
2141
2142 /*
2143  * Debugging only
2144  */
2145 static int
2146 vm_object_in_map(vm_object_t object)
2147 {
2148         struct vm_object_in_map_info info;
2149
2150         info.rv = 0;
2151         info.object = object;
2152
2153         allproc_scan(vm_object_in_map_callback, &info);
2154         if (info.rv)
2155                 return 1;
2156         if( _vm_object_in_map(&kernel_map, object, 0))
2157                 return 1;
2158         if( _vm_object_in_map(&pager_map, object, 0))
2159                 return 1;
2160         if( _vm_object_in_map(&buffer_map, object, 0))
2161                 return 1;
2162         return 0;
2163 }
2164
2165 /*
2166  * Debugging only
2167  */
2168 static int
2169 vm_object_in_map_callback(struct proc *p, void *data)
2170 {
2171         struct vm_object_in_map_info *info = data;
2172
2173         if (p->p_vmspace) {
2174                 if (_vm_object_in_map(&p->p_vmspace->vm_map, info->object, 0)) {
2175                         info->rv = 1;
2176                         return -1;
2177                 }
2178         }
2179         return (0);
2180 }
2181
2182 DB_SHOW_COMMAND(vmochk, vm_object_check)
2183 {
2184         vm_object_t object;
2185
2186         /*
2187          * make sure that internal objs are in a map somewhere
2188          * and none have zero ref counts.
2189          */
2190         for (object = TAILQ_FIRST(&vm_object_list);
2191                         object != NULL;
2192                         object = TAILQ_NEXT(object, object_list)) {
2193                 if (object->type == OBJT_MARKER)
2194                         continue;
2195                 if (object->handle == NULL &&
2196                     (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2197                         if (object->ref_count == 0) {
2198                                 db_printf("vmochk: internal obj has zero ref count: %ld\n",
2199                                         (long)object->size);
2200                         }
2201                         if (!vm_object_in_map(object)) {
2202                                 db_printf(
2203                         "vmochk: internal obj is not in a map: "
2204                         "ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
2205                                     object->ref_count, (u_long)object->size, 
2206                                     (u_long)object->size,
2207                                     (void *)object->backing_object);
2208                         }
2209                 }
2210         }
2211 }
2212
2213 /*
2214  * Debugging only
2215  */
2216 DB_SHOW_COMMAND(object, vm_object_print_static)
2217 {
2218         /* XXX convert args. */
2219         vm_object_t object = (vm_object_t)addr;
2220         boolean_t full = have_addr;
2221
2222         vm_page_t p;
2223
2224         /* XXX count is an (unused) arg.  Avoid shadowing it. */
2225 #define count   was_count
2226
2227         int count;
2228
2229         if (object == NULL)
2230                 return;
2231
2232         db_iprintf(
2233             "Object %p: type=%d, size=0x%lx, res=%d, ref=%d, flags=0x%x\n",
2234             object, (int)object->type, (u_long)object->size,
2235             object->resident_page_count, object->ref_count, object->flags);
2236         /*
2237          * XXX no %qd in kernel.  Truncate object->backing_object_offset.
2238          */
2239         db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%lx\n",
2240             object->shadow_count, 
2241             object->backing_object ? object->backing_object->ref_count : 0,
2242             object->backing_object, (long)object->backing_object_offset);
2243
2244         if (!full)
2245                 return;
2246
2247         db_indent += 2;
2248         count = 0;
2249         RB_FOREACH(p, vm_page_rb_tree, &object->rb_memq) {
2250                 if (count == 0)
2251                         db_iprintf("memory:=");
2252                 else if (count == 6) {
2253                         db_printf("\n");
2254                         db_iprintf(" ...");
2255                         count = 0;
2256                 } else
2257                         db_printf(",");
2258                 count++;
2259
2260                 db_printf("(off=0x%lx,page=0x%lx)",
2261                     (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p));
2262         }
2263         if (count != 0)
2264                 db_printf("\n");
2265         db_indent -= 2;
2266 }
2267
2268 /* XXX. */
2269 #undef count
2270
2271 /*
2272  * XXX need this non-static entry for calling from vm_map_print.
2273  *
2274  * Debugging only
2275  */
2276 void
2277 vm_object_print(/* db_expr_t */ long addr,
2278                 boolean_t have_addr,
2279                 /* db_expr_t */ long count,
2280                 char *modif)
2281 {
2282         vm_object_print_static(addr, have_addr, count, modif);
2283 }
2284
2285 /*
2286  * Debugging only
2287  */
2288 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
2289 {
2290         vm_object_t object;
2291         int nl = 0;
2292         int c;
2293         for (object = TAILQ_FIRST(&vm_object_list);
2294                         object != NULL;
2295                         object = TAILQ_NEXT(object, object_list)) {
2296                 vm_pindex_t idx, fidx;
2297                 vm_pindex_t osize;
2298                 vm_paddr_t pa = -1, padiff;
2299                 int rcount;
2300                 vm_page_t m;
2301
2302                 if (object->type == OBJT_MARKER)
2303                         continue;
2304                 db_printf("new object: %p\n", (void *)object);
2305                 if ( nl > 18) {
2306                         c = cngetc();
2307                         if (c != ' ')
2308                                 return;
2309                         nl = 0;
2310                 }
2311                 nl++;
2312                 rcount = 0;
2313                 fidx = 0;
2314                 osize = object->size;
2315                 if (osize > 128)
2316                         osize = 128;
2317                 for (idx = 0; idx < osize; idx++) {
2318                         m = vm_page_lookup(object, idx);
2319                         if (m == NULL) {
2320                                 if (rcount) {
2321                                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2322                                                 (long)fidx, rcount, (long)pa);
2323                                         if ( nl > 18) {
2324                                                 c = cngetc();
2325                                                 if (c != ' ')
2326                                                         return;
2327                                                 nl = 0;
2328                                         }
2329                                         nl++;
2330                                         rcount = 0;
2331                                 }
2332                                 continue;
2333                         }
2334
2335                                 
2336                         if (rcount &&
2337                                 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
2338                                 ++rcount;
2339                                 continue;
2340                         }
2341                         if (rcount) {
2342                                 padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
2343                                 padiff >>= PAGE_SHIFT;
2344                                 padiff &= PQ_L2_MASK;
2345                                 if (padiff == 0) {
2346                                         pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
2347                                         ++rcount;
2348                                         continue;
2349                                 }
2350                                 db_printf(" index(%ld)run(%d)pa(0x%lx)",
2351                                         (long)fidx, rcount, (long)pa);
2352                                 db_printf("pd(%ld)\n", (long)padiff);
2353                                 if ( nl > 18) {
2354                                         c = cngetc();
2355                                         if (c != ' ')
2356                                                 return;
2357                                         nl = 0;
2358                                 }
2359                                 nl++;
2360                         }
2361                         fidx = idx;
2362                         pa = VM_PAGE_TO_PHYS(m);
2363                         rcount = 1;
2364                 }
2365                 if (rcount) {
2366                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2367                                 (long)fidx, rcount, (long)pa);
2368                         if ( nl > 18) {
2369                                 c = cngetc();
2370                                 if (c != ' ')
2371                                         return;
2372                                 nl = 0;
2373                         }
2374                         nl++;
2375                 }
2376         }
2377 }
2378 #endif /* DDB */