EXT2FS: support variable inode size.
[dragonfly.git] / sys / vfs / gnu / ext2fs / ext2_linux_ialloc.c
1 /*
2  *  modified for Lites 1.1
3  *
4  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5  *  University of Utah, Department of Computer Science
6  *
7  * $FreeBSD: src/sys/gnu/ext2fs/ext2_linux_ialloc.c,v 1.13.2.2 2001/08/14 18:03:19 gallatin Exp $
8  * $DragonFly: src/sys/vfs/gnu/ext2fs/ext2_linux_ialloc.c,v 1.12 2007/09/23 04:09:55 yanyh Exp $
9  */
10 /*
11  *  linux/fs/ext2/ialloc.c
12  *
13  * Copyright (C) 1992, 1993, 1994, 1995
14  * Remy Card (card@masi.ibp.fr)
15  * Laboratoire MASI - Institut Blaise Pascal
16  * Universite Pierre et Marie Curie (Paris VI)
17  *
18  *  BSD ufs-inspired inode and directory allocation by 
19  *  Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
20  */
21
22 /*
23  * The free inodes are managed by bitmaps.  A file system contains several
24  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
25  * block for inodes, N blocks for the inode table and data blocks.
26  *
27  * The file system contains group descriptors which are located after the
28  * super block.  Each descriptor contains the number of the bitmap block and
29  * the free blocks count in the block.  The descriptors are loaded in memory
30  * when a file system is mounted (see ext2_read_super).
31  */
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/buf.h>
36 #include <sys/proc.h>
37 #include <sys/mount.h>
38 #include <sys/vnode.h>
39
40 #include "quota.h"
41 #include "inode.h"
42 #include "ext2mount.h"
43 #include "ext2_extern.h"
44 #include "ext2_fs.h"
45 #include "ext2_fs_sb.h"
46 #include "fs.h"
47 #include <sys/stat.h>
48 #include <sys/buf2.h>
49 #include <sys/thread2.h>
50
51 #ifdef __i386__
52 #include "i386-bitops.h"
53 #else
54 #include "ext2_bitops.h"
55 #endif
56
57 /* this is supposed to mark a buffer dirty on ready for delayed writing
58  */
59 void
60 mark_buffer_dirty(struct buf *bh)
61 {
62         crit_enter();
63         bh->b_flags |= B_DIRTY;
64         crit_exit();
65
66
67 struct ext2_group_desc *
68 get_group_desc(struct mount * mp, unsigned int block_group,
69                struct buffer_head **bh)
70 {
71         struct ext2_sb_info *sb = VFSTOEXT2(mp)->um_e2fs;
72         unsigned long group_desc;
73         unsigned long desc;
74         struct ext2_group_desc * gdp;
75
76         if (block_group >= sb->s_groups_count)
77                 panic ("get_group_desc: "
78                             "block_group >= groups_count - "
79                             "block_group = %d, groups_count = %lu",
80                             block_group, sb->s_groups_count);
81
82         group_desc = block_group / EXT2_DESC_PER_BLOCK(sb);
83         desc = block_group % EXT2_DESC_PER_BLOCK(sb);
84         if (!sb->s_group_desc[group_desc])
85                 panic ( "get_group_desc:"
86                             "Group descriptor not loaded - "
87                             "block_group = %d, group_desc = %lu, desc = %lu",
88                              block_group, group_desc, desc);
89         gdp = (struct ext2_group_desc *) 
90                 sb->s_group_desc[group_desc]->b_data;
91         if (bh)
92                 *bh = sb->s_group_desc[group_desc];
93         return gdp + desc;
94 }
95
96 static void
97 read_inode_bitmap(struct mount *mp, unsigned long block_group,
98                   unsigned int bitmap_nr)
99 {
100         struct ext2_sb_info *sb = VFSTOEXT2(mp)->um_e2fs;
101         struct ext2_group_desc * gdp;
102         struct buffer_head * bh;
103         int     error;
104
105         gdp = get_group_desc (mp, block_group, NULL);
106         if ((error = bread (VFSTOEXT2(mp)->um_devvp, 
107                             fsbtodoff(sb, gdp->bg_inode_bitmap), 
108                             sb->s_blocksize, &bh)) != 0)
109                 panic ( "read_inode_bitmap:"
110                             "Cannot read inode bitmap - "
111                             "block_group = %lu, inode_bitmap = %lu",
112                             block_group, (unsigned long) gdp->bg_inode_bitmap);
113         sb->s_inode_bitmap_number[bitmap_nr] = block_group;
114         sb->s_inode_bitmap[bitmap_nr] = bh;
115         LCK_BUF(bh)
116 }
117
118 /*
119  * load_inode_bitmap loads the inode bitmap for a blocks group
120  *
121  * It maintains a cache for the last bitmaps loaded.  This cache is managed
122  * with a LRU algorithm.
123  *
124  * Notes:
125  * 1/ There is one cache per mounted file system.
126  * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups,
127  *    this function reads the bitmap without maintaining a LRU cache.
128  */
129 static int
130 load_inode_bitmap(struct mount *mp, unsigned int block_group)
131 {
132         struct ext2_sb_info *sb = VFSTOEXT2(mp)->um_e2fs;
133         int i, j;
134         unsigned long inode_bitmap_number;
135         struct buffer_head * inode_bitmap;
136
137         if (block_group >= sb->s_groups_count)
138                 panic ("load_inode_bitmap:"
139                             "block_group >= groups_count - "
140                             "block_group = %d, groups_count = %lu",
141                              block_group, sb->s_groups_count);
142         if (sb->s_loaded_inode_bitmaps > 0 &&
143             sb->s_inode_bitmap_number[0] == block_group)
144                 return 0;
145         if (sb->s_groups_count <= EXT2_MAX_GROUP_LOADED) {
146                 if (sb->s_inode_bitmap[block_group]) {
147                         if (sb->s_inode_bitmap_number[block_group] != 
148                                 block_group)
149                                 panic ( "load_inode_bitmap:"
150                                     "block_group != inode_bitmap_number");
151                         else
152                                 return block_group;
153                 } else {
154                         read_inode_bitmap (mp, block_group, block_group);
155                         return block_group;
156                 }
157         }
158
159         for (i = 0; i < sb->s_loaded_inode_bitmaps &&
160                     sb->s_inode_bitmap_number[i] != block_group;
161              i++)
162                 ;
163         if (i < sb->s_loaded_inode_bitmaps &&
164             sb->s_inode_bitmap_number[i] == block_group) {
165                 inode_bitmap_number = sb->s_inode_bitmap_number[i];
166                 inode_bitmap = sb->s_inode_bitmap[i];
167                 for (j = i; j > 0; j--) {
168                         sb->s_inode_bitmap_number[j] =
169                                 sb->s_inode_bitmap_number[j - 1];
170                         sb->s_inode_bitmap[j] =
171                                 sb->s_inode_bitmap[j - 1];
172                 }
173                 sb->s_inode_bitmap_number[0] = inode_bitmap_number;
174                 sb->s_inode_bitmap[0] = inode_bitmap;
175         } else {
176                 if (sb->s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED)
177                         sb->s_loaded_inode_bitmaps++;
178                 else
179                         ULCK_BUF(sb->s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1])
180                 for (j = sb->s_loaded_inode_bitmaps - 1; j > 0; j--) {
181                         sb->s_inode_bitmap_number[j] =
182                                 sb->s_inode_bitmap_number[j - 1];
183                         sb->s_inode_bitmap[j] =
184                                 sb->s_inode_bitmap[j - 1];
185                 }
186                 read_inode_bitmap (mp, block_group, 0);
187         }
188         return 0;
189 }
190
191
192 void
193 ext2_free_inode(struct inode *inode)
194 {
195         struct ext2_sb_info * sb;
196         struct buffer_head * bh;
197         struct buffer_head * bh2;
198         unsigned long block_group;
199         unsigned long bit;
200         int bitmap_nr;
201         struct ext2_group_desc * gdp;
202         struct ext2_super_block * es;
203
204         if (!inode)
205                 return;
206
207         if (inode->i_nlink) {
208                 kprintf ("ext2_free_inode: inode has nlink=%d\n",
209                         inode->i_nlink);
210                 return;
211         }
212
213         ext2_debug ("freeing inode %lu\n", inode->i_number);
214
215         sb = inode->i_e2fs;
216         lock_super (DEVVP(inode));
217         if (inode->i_number < EXT2_FIRST_INO(sb) ||
218             inode->i_number > sb->s_es->s_inodes_count) {
219                 kprintf ("free_inode reserved inode or nonexistent inode");
220                 unlock_super (DEVVP(inode));
221                 return;
222         }
223         es = sb->s_es;
224         block_group = (inode->i_number - 1) / EXT2_INODES_PER_GROUP(sb);
225         bit = (inode->i_number - 1) % EXT2_INODES_PER_GROUP(sb);
226         bitmap_nr = load_inode_bitmap (ITOV(inode)->v_mount, block_group);
227         bh = sb->s_inode_bitmap[bitmap_nr];
228         if (!clear_bit (bit, bh->b_data))       
229                 kprintf ( "ext2_free_inode:"
230                       "bit already cleared for inode %lu",
231                       (unsigned long)inode->i_number);
232         else {
233                 gdp = get_group_desc (ITOV(inode)->v_mount, block_group, &bh2);
234                 gdp->bg_free_inodes_count++;
235                 if (S_ISDIR(inode->i_mode)) 
236                         gdp->bg_used_dirs_count--;
237                 mark_buffer_dirty(bh2);
238                 es->s_free_inodes_count++;
239         }
240         mark_buffer_dirty(bh);
241 /*** XXX
242         if (sb->s_flags & MS_SYNCHRONOUS) {
243                 ll_rw_block (WRITE, 1, &bh);
244                 wait_on_buffer (bh);
245         }
246 ***/
247         sb->s_dirt = 1;
248         unlock_super (DEVVP(inode));
249 }
250
251 #if linux
252 /*
253  * This function increments the inode version number
254  *
255  * This may be used one day by the NFS server
256  */
257 static void
258 inc_inode_version(struct inode *inode, struct ext2_group_desc *gdp, int mode)
259 {
260         unsigned long inode_block;
261         struct buffer_head * bh;
262         struct ext2_inode * raw_inode;
263
264         inode_block = gdp->bg_inode_table + (((inode->i_number - 1) %
265                         EXT2_INODES_PER_GROUP(inode->i_sb)) /
266                         EXT2_INODES_PER_BLOCK(inode->i_sb));
267         bh = bread (inode->i_sb->s_dev, dbtob(inode_block), inode->i_sb->s_blocksize);
268         if (!bh) {
269                 kprintf ("inc_inode_version Cannot load inode table block - "
270                             "inode=%lu, inode_block=%lu\n",
271                             inode->i_number, inode_block);
272                 inode->u.ext2_i.i_version = 1;
273                 return;
274         }
275         raw_inode = ((struct ext2_inode *) bh->b_data) +
276                         (((inode->i_number - 1) %
277                         EXT2_INODES_PER_GROUP(inode->i_sb)) %
278                         EXT2_INODES_PER_BLOCK(inode->i_sb));
279         raw_inode->i_version++;
280         inode->u.ext2_i.i_version = raw_inode->i_version;
281         bdwrite (bh);
282 }
283
284 #endif /* linux */
285
286 /*
287  * There are two policies for allocating an inode.  If the new inode is
288  * a directory, then a forward search is made for a block group with both
289  * free space and a low directory-to-inode ratio; if that fails, then of
290  * the groups with above-average free space, that group with the fewest
291  * directories already is chosen.
292  *
293  * For other inodes, search forward from the parent directory\'s block
294  * group to find a free inode.
295  */
296 /*
297  * this functino has been reduced to the actual 'find the inode number' part
298  */
299 ino_t
300 ext2_new_inode(const struct inode *dir, int mode)
301 {
302         struct ext2_sb_info * sb;
303         struct buffer_head * bh;
304         struct buffer_head * bh2;
305         int i, j, avefreei;
306         int bitmap_nr;
307         struct ext2_group_desc * gdp;
308         struct ext2_group_desc * tmp;
309         struct ext2_super_block * es;
310
311         if (!dir)
312                 return 0;
313         sb = dir->i_e2fs;
314
315         lock_super (DEVVP(dir));
316         es = sb->s_es;
317 repeat:
318         gdp = NULL; i=0;
319
320         if (S_ISDIR(mode)) {
321                 avefreei = es->s_free_inodes_count /
322                         sb->s_groups_count;
323 /* I am not yet convinced that this next bit is necessary.
324                 i = dir->u.ext2_i.i_block_group;
325                 for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) {
326                         tmp = get_group_desc (sb, i, &bh2);
327                         if ((tmp->bg_used_dirs_count << 8) < 
328                             tmp->bg_free_inodes_count) {
329                                 gdp = tmp;
330                                 break;
331                         }
332                         else
333                         i = ++i % sb->u.ext2_sb.s_groups_count;
334                 }
335 */
336                 if (!gdp) {
337                         for (j = 0; j < sb->s_groups_count; j++) {
338                                 tmp = get_group_desc(ITOV(dir)->v_mount,j,&bh2);
339                                 if (tmp->bg_free_inodes_count &&
340                                         tmp->bg_free_inodes_count >= avefreei) {
341                                         if (!gdp || 
342                                             (tmp->bg_free_blocks_count >
343                                              gdp->bg_free_blocks_count)) {
344                                                 i = j;
345                                                 gdp = tmp;
346                                         }
347                                 }
348                         }
349                 }
350         }
351         else 
352         {
353                 /*
354                  * Try to place the inode in its parent directory
355                  */
356                 i = dir->i_block_group;
357                 tmp = get_group_desc (ITOV(dir)->v_mount, i, &bh2);
358                 if (tmp->bg_free_inodes_count)
359                         gdp = tmp;
360                 else
361                 {
362                         /*
363                          * Use a quadratic hash to find a group with a
364                          * free inode
365                          */
366                         for (j = 1; j < sb->s_groups_count; j <<= 1) {
367                                 i += j;
368                                 if (i >= sb->s_groups_count)
369                                         i -= sb->s_groups_count;
370                                 tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2);
371                                 if (tmp->bg_free_inodes_count) {
372                                         gdp = tmp;
373                                         break;
374                                 }
375                         }
376                 }
377                 if (!gdp) {
378                         /*
379                          * That failed: try linear search for a free inode
380                          */
381                         i = dir->i_block_group + 1;
382                         for (j = 2; j < sb->s_groups_count; j++) {
383                                 if (++i >= sb->s_groups_count)
384                                         i = 0;
385                                 tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2);
386                                 if (tmp->bg_free_inodes_count) {
387                                         gdp = tmp;
388                                         break;
389                                 }
390                         }
391                 }
392         }
393
394         if (!gdp) {
395                 unlock_super (DEVVP(dir));
396                 return 0;
397         }
398         bitmap_nr = load_inode_bitmap (ITOV(dir)->v_mount, i);
399         bh = sb->s_inode_bitmap[bitmap_nr];
400         if ((j = find_first_zero_bit ((unsigned long *) bh->b_data,
401                                       EXT2_INODES_PER_GROUP(sb))) <
402             EXT2_INODES_PER_GROUP(sb)) {
403                 if (set_bit (j, bh->b_data)) {
404                         kprintf ( "ext2_new_inode:"
405                                       "bit already set for inode %d", j);
406                         goto repeat;
407                 }
408 /* Linux now does the following:
409                 mark_buffer_dirty(bh);
410                 if (sb->s_flags & MS_SYNCHRONOUS) {
411                         ll_rw_block (WRITE, 1, &bh);
412                         wait_on_buffer (bh);
413                 }
414 */
415                 mark_buffer_dirty(bh);
416         } else {
417                 if (gdp->bg_free_inodes_count != 0) {
418                         kprintf ( "ext2_new_inode:"
419                                     "Free inodes count corrupted in group %d",
420                                     i);
421                         unlock_super (DEVVP(dir));
422                         return 0;
423                 }
424                 goto repeat;
425         }
426         j += i * EXT2_INODES_PER_GROUP(sb) + 1;
427         if (j < EXT2_FIRST_INO(sb) || j > es->s_inodes_count) {
428                 kprintf ( "ext2_new_inode:"
429                             "reserved inode or inode > inodes count - "
430                             "block_group = %d,inode=%d", i, j);
431                 unlock_super (DEVVP(dir));
432                 return 0;
433         }
434         gdp->bg_free_inodes_count--;
435         if (S_ISDIR(mode))
436                 gdp->bg_used_dirs_count++;
437         mark_buffer_dirty(bh2);
438         es->s_free_inodes_count--;
439         /* mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); */
440         sb->s_dirt = 1;
441         unlock_super (DEVVP(dir));
442         return j;
443 }
444
445 #ifdef unused
446 static unsigned long
447 ext2_count_free_inodes(struct mount *mp)
448 {
449 #ifdef EXT2FS_DEBUG
450         struct ext2_sb_info *sb = VFSTOEXT2(mp)->um_e2fs;
451         struct ext2_super_block * es;
452         unsigned long desc_count, bitmap_count, x;
453         int bitmap_nr;
454         struct ext2_group_desc * gdp;
455         int i;
456
457         lock_super (VFSTOEXT2(mp)->um_devvp);
458         es = sb->s_es;
459         desc_count = 0;
460         bitmap_count = 0;
461         gdp = NULL;
462         for (i = 0; i < sb->s_groups_count; i++) {
463                 gdp = get_group_desc (mp, i, NULL);
464                 desc_count += gdp->bg_free_inodes_count;
465                 bitmap_nr = load_inode_bitmap (mp, i);
466                 x = ext2_count_free (sb->s_inode_bitmap[bitmap_nr],
467                                      EXT2_INODES_PER_GROUP(sb) / 8);
468                 ext2_debug ("group %d: stored = %d, counted = %lu\n",
469                         i, gdp->bg_free_inodes_count, x);
470                 bitmap_count += x;
471         }
472         ext2_debug("stored = %lu, computed = %lu, %lu\n",
473                 es->s_free_inodes_count, desc_count, bitmap_count);
474         unlock_super (VFSTOEXT2(mp)->um_devvp);
475         return desc_count;
476 #else
477         return VFSTOEXT2(mp)->um_e2fsb->s_free_inodes_count;
478 #endif
479 }
480 #endif /* unused */
481
482 #ifdef LATER
483 void
484 ext2_check_inodes_bitmap(struct mount *mp)
485 {
486         struct ext2_super_block * es;
487         unsigned long desc_count, bitmap_count, x;
488         int bitmap_nr;
489         struct ext2_group_desc * gdp;
490         int i;
491
492         lock_super (sb);
493         es = sb->u.ext2_sb.s_es;
494         desc_count = 0;
495         bitmap_count = 0;
496         gdp = NULL;
497         for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) {
498                 gdp = get_group_desc (sb, i, NULL);
499                 desc_count += gdp->bg_free_inodes_count;
500                 bitmap_nr = load_inode_bitmap (sb, i);
501                 x = ext2_count_free (sb->u.ext2_sb.s_inode_bitmap[bitmap_nr],
502                                      EXT2_INODES_PER_GROUP(sb) / 8);
503                 if (gdp->bg_free_inodes_count != x)
504                         kprintf ( "ext2_check_inodes_bitmap:"
505                                     "Wrong free inodes count in group %d, "
506                                     "stored = %d, counted = %lu", i,
507                                     gdp->bg_free_inodes_count, x);
508                 bitmap_count += x;
509         }
510         if (es->s_free_inodes_count != bitmap_count)
511                 kprintf ( "ext2_check_inodes_bitmap:"
512                             "Wrong free inodes count in super block, "
513                             "stored = %lu, counted = %lu",
514                             (unsigned long) es->s_free_inodes_count, bitmap_count);
515         unlock_super (sb);
516 }
517 #endif