1 /******************************************************************************
3 VERSION $FreeBSD: src/lib/libc/db/mpool/mpool.libtp,v 1.4 1999/08/27 23:58:23 peter Exp $
4 VERSION $DragonFly: src/lib/libcr/db/mpool/Attic/mpool.libtp,v 1.2 2003/06/17 04:26:42 dillon Exp $
5 PACKAGE: User Level Shared Memory Manager
8 This package provides a buffer pool interface implemented as
9 a collection of file pages mapped into shared memory.
11 Based on Mark's buffer manager
31 ******************************************************************************/
32 #include <sys/types.h>
46 we need to translate between some type of file id that the user
47 process passes and a file descriptor. For now, it's a nop.
49 #define GET_MASTER get_sem ( buf_spinlock )
50 #define RELEASE_MASTER release_sem ( buf_spinlock )
52 #define LRUID *buf_lru
53 #define LRUP (bufhdr_table+*buf_lru)
54 #define MRU bufhdr_table[*buf_lru].lru.prev
56 /* Global indicator that you have started reusing buffers */
57 int do_statistics = 0;
59 Process Statics (pointers into shared memory)
61 static BUF_T *buf_table = 0;
62 static BUFHDR_T *bufhdr_table;
63 static int *buf_hash_table;
64 static int *buf_lru; /* LRU is the free list */
65 static int buf_spinlock;
66 static FINFO_T *buf_fids;
67 static int *buf_sp; /* Pointer to string free space */
68 static char *buf_strings;
70 /* Process Local FID->FD table */
71 static int fds[NUM_FILE_ENTRIES];
74 static BUFHDR_T *bf_assign_buf();
75 static int bf_fid_to_fd();
76 static BUFHDR_T *bf_newbuf();
77 static int bf_put_page();
93 Initialize Process local structures
95 for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
99 buf_region = attach_region ( BUF_REGION_NAME, BUF_REGION_NUM,
100 BUF_REGION_SIZE, &ref_count );
104 error_log3 ( "Buf Region: ADDR: %d ID: %d SIZE: %d\n", buf_region,
105 BUF_REGION_NUM, BUF_REGION_SIZE );
107 buf_table = (BUF_T *)buf_region;
108 bufhdr_table = (BUFHDR_T *)(buf_table + NUM_BUFS);
109 buf_hash_table = (int *)(bufhdr_table + NUM_BUFS);
110 buf_lru = buf_hash_table + NUMTABLE_ENTRIES;
111 spinlockp = buf_lru + 1;
112 buf_fids = (FINFO_T *)(spinlockp+1);
113 buf_sp = (int *)(buf_fids + NUM_FILE_ENTRIES);
114 buf_strings = (char *)(buf_sp + 1);
116 /* Create locking spinlock (gets creating holding the lock) */
117 buf_spinlock = create_sem ( BUF_SPIN_NAME, BUF_SPIN_NUM, ref_count <= 1 );
118 if ( buf_spinlock < 0 ) {
121 if ( ref_count <= 1 ) {
122 *spinlockp = buf_spinlock;
124 /* Now initialize the buffer manager */
129 /* 2. Buffer headers */
130 for ( i = 0, bhp = bufhdr_table; i < NUM_BUFS; bhp++, i++ ) {
133 bhp->flags = 0; /* All Flags off */
135 bhp->wait_proc = -1; /* No sleepers */
136 LISTPE_INIT ( hash, bhp, i ); /* Hash chains */
138 bufhdr_table[0].lru.prev = NUM_BUFS-1;
139 bufhdr_table[NUM_BUFS-1].lru.next = 0;
142 for ( i = 0; i < NUMTABLE_ENTRIES; i++ ) {
143 buf_hash_table[i] = NUM_BUFS;
146 /* 4. File ID Table */
147 for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
148 buf_fids[i].offset = -1;
149 buf_fids[i].npages = -1;
150 buf_fids[i].refcount = 0;
153 /* 5. Free String Pointer */
154 *buf_sp = (FILE_NAME_LEN*NUM_FILE_ENTRIES);
155 if (RELEASE_MASTER) {
158 error_log0 ( "Initialized buffer region\n" );
169 /* Flush Buffer Pool on Exit */
170 for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
171 if ( fds[i] != -1 ) {
176 detach_region ( buf_table, BUF_REGION_NUM, BUF_REGION_SIZE, &ref );
182 We need an empty buffer. Find the LRU unpinned NON-Dirty page.
195 bhp->flags & (BUF_PINNED|BUF_IO_IN_PROGRESS);
196 bhp = LISTP_NEXTP (bufhdr_table, lru, bhp ) ) {
198 if ( bhp->lru.next == lruid ) {
200 error_log1 ( "All buffers are pinned. %s\n",
201 "Unable to grant buffer request" );
205 /* BHP can be used */
206 if ( bhp->flags & BUF_DIRTY ) {
209 MIS Check for log flushed appropriately
211 fd = bf_fid_to_fd(bhp->id.file_id);
213 error_log1 ("Invalid fid %d\n", bhp->id.file_id);
216 if ( bf_put_page(fd, bhp) < 0 ) {
220 /* Update Hash Pointers */
221 ndx = BUF_HASH ( bhp->id.file_id, bhp->id.obj_id );
222 LISTP_REMOVE(bufhdr_table, hash, bhp);
223 if ( buf_hash_table[ndx] == (bhp-bufhdr_table) ) {
224 if ( bhp->hash.next != (bhp-bufhdr_table) ) {
225 buf_hash_table[ndx] = bhp->hash.next;
227 buf_hash_table[ndx] = NUM_BUFS;
237 Add a page to a file and return a buffer for it.
241 buf_alloc ( fid, new_pageno )
254 if ( buf_fids[fid].npages == -1 ) {
255 /* initialize npages field */
256 fd = bf_fid_to_fd ( fid );
258 assert (fid < NUM_FILE_ENTRIES);
260 *new_pageno = buf_fids[fid].npages;
261 if ( *new_pageno == -1 ) {
265 buf_fids[fid].npages++;
266 ndx = BUF_HASH ( fid, *new_pageno );
268 fobj.obj_id = *new_pageno;
269 bhp = bf_assign_buf ( ndx, &fobj, BF_PIN|BF_DIRTY|BF_EMPTY, &len );
270 if ( RELEASE_MASTER ) {
275 return ((ADDR_T)(buf_table+(bhp-bufhdr_table)));
284 BF_DIRTY Mark page as dirty
285 BF_EMPTY Don't initialize page, just get buffer
286 BF_PIN Retrieve with pin
289 Might want to add a flag that sets an LSN for this buffer is the
292 Eventually, you may want a flag that indicates the I/O and lock
293 request should be shipped off together, but not for now.
296 buf_get ( file_id, page_id, flags, len )
300 int *len; /* Number of bytes read into buffer */
310 ndx = BUF_HASH ( file_id, page_id );
311 fobj.file_id = (long) file_id;
312 fobj.obj_id = (long) page_id;
317 This could be a for loop, but we lose speed
318 by making all the cases general purpose so we
319 optimize for the no-collision case.
321 bufid = buf_hash_table[ndx];
322 if ( bufid < NUM_BUFS ) {
323 for ( bhp = bufhdr_table+bufid;
324 !OBJ_EQ (bhp->id, fobj) || !(bhp->flags & BUF_VALID);
325 bhp = LISTP_NEXTP ( bufhdr_table, hash, bhp ) ) {
327 if ( bhp->hash.next == bufid ) {
332 if ( flags & BF_PIN ) {
333 bhp->flags |= BUF_PINNED;
336 fprintf(stderr, "buf_get: %X PINNED (%d)\n",
337 buf_table + (bhp-bufhdr_table), bhp->refcount);
340 if ( flags & BF_DIRTY ) {
341 bhp->flags |= BUF_DIRTY;
344 while ( bhp->flags & BUF_IO_IN_PROGRESS ) {
345 /* MIS -- eventually err check here */
347 printf("About to sleep on %d (me: %d\n)\n", bhp->wait_proc,
348 my_txnp - txn_table);
353 stat = proc_sleep_on ( &(bhp->wait_proc), buf_spinlock );
358 if (!( bhp->flags & BUF_IO_IN_PROGRESS) &&
359 (!OBJ_EQ (bhp->id, fobj) || !(bhp->flags & BUF_VALID))) {
362 return(buf_get ( file_id, page_id, flags, len ));
369 /* If you get here, the page isn't in the hash table */
370 bhp = bf_assign_buf ( ndx, &fobj, flags, len );
372 /* Common code between found and not found */
374 if ( bhp && bhp->flags & BUF_NEWPAGE ) {
382 return ((ADDR_T)(buf_table+(bhp-bufhdr_table)));
389 MIS - do I want to add file links to buffer pool?
392 buf_sync ( fid, close )
394 int close; /* should we dec refcount and possibly
395 invalidate all the buffers */
402 if ( (fd = bf_fid_to_fd ( fid )) < 0 ) {
408 invalidate = (buf_fids[fid].refcount == 1 && close);
410 for ( bhp = bufhdr_table, i = 0; i < NUM_BUFS; bhp++, i++ ) {
411 if (bhp->id.file_id == fid) {
412 if ((bhp->flags & BF_DIRTY) && (bf_put_page( fd, bhp ) < 0)) {
415 bhp->id.file_id = -1;
418 if (invalidate || close)
419 buf_fids[fid].refcount--;
421 if (RELEASE_MASTER) {
430 buf_flags ( addr, set_flags, unset_flags )
439 fprintf(stderr, "buf_flags: %X setting %s%s%s%s%s releasing %s%s%s%s%s\n",
441 set_flags&BUF_DIRTY ? "DIRTY " : "",
442 set_flags&BUF_VALID ? "VALID " : "",
443 set_flags&BUF_PINNED ? "PINNED " : "",
444 set_flags&BUF_IO_ERROR ? "IO_ERROR " : "",
445 set_flags&BUF_IO_IN_PROGRESS ? "IO_IN_PROG " : "",
446 set_flags&BUF_NEWPAGE ? "NEWPAGE " : "",
447 unset_flags&BUF_DIRTY ? "DIRTY " : "",
448 unset_flags&BUF_VALID ? "VALID " : "",
449 unset_flags&BUF_PINNED ? "PINNED " : "",
450 unset_flags&BUF_IO_ERROR ? "IO_ERROR " : "",
451 unset_flags&BUF_IO_IN_PROGRESS ? "IO_IN_PROG " : "",
452 unset_flags&BUF_NEWPAGE ? "NEWPAGE " : "" );
454 if (!ADDR_OK(addr)) {
455 error_log1 ( "buf_pin: Invalid Buffer Address %x\n", addr );
458 bufid = ((BUF_T *)addr) - buf_table;
459 assert ( bufid < NUM_BUFS);
460 bhp = &bufhdr_table[bufid];
464 bhp->flags |= set_flags;
465 if ( set_flags & BUF_PINNED ) {
468 if ( set_flags & BUF_DIRTY ) {
469 unset_flags |= BUF_NEWPAGE;
472 if ( unset_flags & BUF_PINNED ) {
474 if ( bhp->refcount ) {
475 /* Turn off pin bit so it doesn't get unset */
476 unset_flags &= ~BUF_PINNED;
479 bhp->flags &= ~unset_flags;
481 if (RELEASE_MASTER) {
488 Take a string name and produce an fid.
492 MIS -- this is a potential problem -- you keep actual names
493 here -- what if people run from different directories?
496 buf_name_lookup ( fname )
507 for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
508 if ( buf_fids[i].offset == -1 ) {
511 if (!strcmp (fname, buf_strings+buf_fids[i].offset)) {
512 if (RELEASE_MASTER) {
515 buf_fids[i].refcount++;
521 error_log0 ( "No more file ID's\n" );
523 ndx = *buf_sp - strlen(fname) - 1;
525 error_log0 ( "Out of string space\n" );
529 strcpy ( buf_strings+ndx, fname );
530 buf_fids[fid].offset = ndx;
532 buf_fids[fid].refcount = 1;
534 if (RELEASE_MASTER) {
546 assert ( (fid < NUM_FILE_ENTRIES) && (buf_fids[fid].offset != -1) );
547 if ( fds[fid] != -1 ) {
551 fds[fid] = open ( buf_strings+buf_fids[fid].offset, O_RDWR|O_CREAT,
553 if ( fds[fid] < 0 ) {
554 error_log3 ( "Error Opening File %s FID: %d FD: %d. Errno = %d\n",
555 buf_strings+buf_fids[fid].offset, fid, fds[fid],
559 error_log3 ( "Opening File %s FID: %d FD: %d\n",
560 buf_strings+buf_fids[fid].offset, fid, fds[fid] );
561 if ( buf_fids[fid].npages == -1 ) {
562 /* Initialize the npages field */
563 if ( fstat ( fds[fid], &sbuf ) ) {
564 error_log3 ( "Error Fstating %s FID: %d. Errno = %d\n",
565 buf_strings+buf_fids[fid].offset, fid, errno );
567 buf_fids[fid].npages = ( sbuf.st_size / BUFSIZE );
575 bf_put_page ( fd, bhp )
581 assert ( (bhp-bufhdr_table) < NUM_BUFS );
582 if ( lseek ( fd, bhp->id.obj_id << BUFSHIFT, L_SET ) < 0 ) {
585 bhp->flags |= BUF_IO_IN_PROGRESS;
586 if (RELEASE_MASTER) {
589 nbytes = write(fd, buf_table[bhp-bufhdr_table], BUFSIZE);
594 error_log1 ("Write failed with error code %d\n", errno);
596 } else if ( nbytes != BUFSIZE ) {
597 error_log1 ("Short write: %d bytes of %d\n", nbytes, BUFSIZE );
599 bhp->flags &= ~(BUF_DIRTY|BUF_IO_IN_PROGRESS);
604 bf_assign_buf ( ndx, obj, flags, len )
608 int *len; /* Number of bytes read */
613 assert ( obj->file_id < NUM_FILE_ENTRIES );
618 OBJ_ASSIGN ( (*obj), bhp->id );
619 if ( buf_hash_table[ndx] >= NUM_BUFS ) {
620 buf_hash_table[ndx] = bhp-bufhdr_table;
622 LISTPE_INSERT ( bufhdr_table, hash, bhp, buf_hash_table[ndx] );
625 bhp->flags |= BUF_VALID;
626 if ( flags & BF_PIN ) {
627 bhp->flags |= BUF_PINNED;
630 fprintf(stderr, "bf_assign_buf: %X PINNED (%d)\n",
631 buf_table + (bhp-bufhdr_table), bhp->refcount);
634 fd = bf_fid_to_fd(obj->file_id);
636 error_log1 ("Invalid fid %d\n", obj->file_id);
637 bhp->flags |= ~BUF_IO_ERROR;
640 if ( obj->obj_id >= buf_fids[obj->file_id].npages) {
641 buf_fids[obj->file_id].npages = obj->obj_id+1;
643 } else if ( flags & BF_EMPTY ) {
646 bhp->flags |= BUF_IO_IN_PROGRESS;
647 if (RELEASE_MASTER) {
650 if ( lseek ( fd, obj->obj_id << BUFSHIFT, L_SET ) < -1 ) {
651 error_log2 ("Unable to perform seek on file: %d to page %d",
652 obj->file_id, obj->obj_id );
653 bhp->flags &= ~BUF_IO_IN_PROGRESS;
654 bhp->flags |= ~BUF_IO_ERROR;
657 *len = read(fd, buf_table[bhp-bufhdr_table], BUFSIZE);
659 error_log2 ("Unable to perform read on file: %d to page %d",
660 obj->file_id, obj->obj_id );
661 bhp->flags &= ~BUF_IO_IN_PROGRESS;
662 bhp->flags |= ~BUF_IO_ERROR;
668 bhp->flags &= ~BUF_IO_IN_PROGRESS;
669 if ( bhp->wait_proc != -1 ) {
670 /* wake up waiter and anyone waiting on it */
672 printf("Waking transaction %d due to completed I/O\n",
675 proc_wake_id ( bhp->wait_proc );
681 if ( flags & BF_DIRTY ) {
682 bhp->flags |= BUF_DIRTY;
683 } else if ( *len < BUFSIZE ) {
684 bhp->flags |= BUF_NEWPAGE;
698 assert ( fid < NUM_FILE_ENTRIES );
699 if ( buf_fids[fid].npages == -1 ) {
700 /* initialize npages field */
701 (void) bf_fid_to_fd ( fid );
703 val = buf_fids[fid].npages;
705 val--; /* Convert to page number */
707 if (RELEASE_MASTER) {
722 printf ( "LRU + %d\n", *buf_lru );
724 printf("ID\tFID\tPID\tLNEXT\tLPREV\tHNEXT\tHPREV\tSLEEP\tFLAG\tREFS\n");
725 for ( bhp = bufhdr_table, i = 0; i < NUM_BUFS; bhp++, i++ ) {
726 printf ( "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%x\t%d\n", i,
727 bhp->id.file_id, bhp->id.obj_id,
728 bhp->lru.next, bhp->lru.prev,
729 bhp->hash.next, bhp->hash.prev,
730 bhp->wait_proc, bhp->flags, bhp->refcount );
733 if ( id >= NUM_BUFS ) {
734 printf ( "Buffer ID (%d) too high\n", id );
737 bhp = bufhdr_table+id;
738 printf ( "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%x\t%d\n", i,
739 bhp->id.file_id, bhp->id.obj_id,
740 bhp->lru.next, bhp->lru.prev,
741 bhp->hash.next, bhp->hash.prev,
742 bhp->wait_proc, bhp->flags, bhp->refcount );