EXT2FS: support variable inode size.
[dragonfly.git] / sys / vfs / gnu / ext2fs / ext2_linux_ialloc.c
CommitLineData
984263bc
MD
1/*
2 * modified for Lites 1.1
3 *
4 * Aug 1995, Godmar Back (gback@cs.utah.edu)
5 * University of Utah, Department of Computer Science
6 *
7 * $FreeBSD: src/sys/gnu/ext2fs/ext2_linux_ialloc.c,v 1.13.2.2 2001/08/14 18:03:19 gallatin Exp $
7aa379b3 8 * $DragonFly: src/sys/vfs/gnu/ext2fs/ext2_linux_ialloc.c,v 1.12 2007/09/23 04:09:55 yanyh Exp $
984263bc
MD
9 */
10/*
11 * linux/fs/ext2/ialloc.c
12 *
13 * Copyright (C) 1992, 1993, 1994, 1995
14 * Remy Card (card@masi.ibp.fr)
15 * Laboratoire MASI - Institut Blaise Pascal
16 * Universite Pierre et Marie Curie (Paris VI)
17 *
18 * BSD ufs-inspired inode and directory allocation by
19 * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
20 */
21
22/*
23 * The free inodes are managed by bitmaps. A file system contains several
24 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
25 * block for inodes, N blocks for the inode table and data blocks.
26 *
27 * The file system contains group descriptors which are located after the
28 * super block. Each descriptor contains the number of the bitmap block and
29 * the free blocks count in the block. The descriptors are loaded in memory
30 * when a file system is mounted (see ext2_read_super).
31 */
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/buf.h>
36#include <sys/proc.h>
37#include <sys/mount.h>
38#include <sys/vnode.h>
39
1f1db49f
MD
40#include "quota.h"
41#include "inode.h"
42#include "ext2mount.h"
1f2de5d4
MD
43#include "ext2_extern.h"
44#include "ext2_fs.h"
45#include "ext2_fs_sb.h"
46#include "fs.h"
984263bc 47#include <sys/stat.h>
7b95be2a 48#include <sys/buf2.h>
61670a01 49#include <sys/thread2.h>
984263bc
MD
50
51#ifdef __i386__
1f2de5d4 52#include "i386-bitops.h"
984263bc 53#else
7aa379b3 54#include "ext2_bitops.h"
984263bc
MD
55#endif
56
57/* this is supposed to mark a buffer dirty on ready for delayed writing
58 */
b1ce5639
SW
59void
60mark_buffer_dirty(struct buf *bh)
984263bc 61{
165dba55 62 crit_enter();
984263bc 63 bh->b_flags |= B_DIRTY;
165dba55 64 crit_exit();
984263bc
MD
65}
66
b1ce5639
SW
67struct ext2_group_desc *
68get_group_desc(struct mount * mp, unsigned int block_group,
69 struct buffer_head **bh)
984263bc 70{
1f1db49f 71 struct ext2_sb_info *sb = VFSTOEXT2(mp)->um_e2fs;
984263bc
MD
72 unsigned long group_desc;
73 unsigned long desc;
74 struct ext2_group_desc * gdp;
75
76 if (block_group >= sb->s_groups_count)
77 panic ("get_group_desc: "
78 "block_group >= groups_count - "
79 "block_group = %d, groups_count = %lu",
80 block_group, sb->s_groups_count);
81
82 group_desc = block_group / EXT2_DESC_PER_BLOCK(sb);
83 desc = block_group % EXT2_DESC_PER_BLOCK(sb);
84 if (!sb->s_group_desc[group_desc])
85 panic ( "get_group_desc:"
86 "Group descriptor not loaded - "
87 "block_group = %d, group_desc = %lu, desc = %lu",
88 block_group, group_desc, desc);
89 gdp = (struct ext2_group_desc *)
90 sb->s_group_desc[group_desc]->b_data;
91 if (bh)
92 *bh = sb->s_group_desc[group_desc];
93 return gdp + desc;
94}
95
b1ce5639
SW
96static void
97read_inode_bitmap(struct mount *mp, unsigned long block_group,
98 unsigned int bitmap_nr)
984263bc 99{
1f1db49f 100 struct ext2_sb_info *sb = VFSTOEXT2(mp)->um_e2fs;
984263bc
MD
101 struct ext2_group_desc * gdp;
102 struct buffer_head * bh;
103 int error;
104
105 gdp = get_group_desc (mp, block_group, NULL);
1f1db49f 106 if ((error = bread (VFSTOEXT2(mp)->um_devvp,
54078292 107 fsbtodoff(sb, gdp->bg_inode_bitmap),
7b95be2a 108 sb->s_blocksize, &bh)) != 0)
984263bc
MD
109 panic ( "read_inode_bitmap:"
110 "Cannot read inode bitmap - "
111 "block_group = %lu, inode_bitmap = %lu",
112 block_group, (unsigned long) gdp->bg_inode_bitmap);
113 sb->s_inode_bitmap_number[bitmap_nr] = block_group;
114 sb->s_inode_bitmap[bitmap_nr] = bh;
115 LCK_BUF(bh)
116}
117
118/*
119 * load_inode_bitmap loads the inode bitmap for a blocks group
120 *
121 * It maintains a cache for the last bitmaps loaded. This cache is managed
122 * with a LRU algorithm.
123 *
124 * Notes:
125 * 1/ There is one cache per mounted file system.
126 * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups,
127 * this function reads the bitmap without maintaining a LRU cache.
128 */
b1ce5639
SW
129static int
130load_inode_bitmap(struct mount *mp, unsigned int block_group)
984263bc 131{
1f1db49f 132 struct ext2_sb_info *sb = VFSTOEXT2(mp)->um_e2fs;
984263bc
MD
133 int i, j;
134 unsigned long inode_bitmap_number;
135 struct buffer_head * inode_bitmap;
136
137 if (block_group >= sb->s_groups_count)
138 panic ("load_inode_bitmap:"
139 "block_group >= groups_count - "
140 "block_group = %d, groups_count = %lu",
141 block_group, sb->s_groups_count);
142 if (sb->s_loaded_inode_bitmaps > 0 &&
143 sb->s_inode_bitmap_number[0] == block_group)
144 return 0;
145 if (sb->s_groups_count <= EXT2_MAX_GROUP_LOADED) {
146 if (sb->s_inode_bitmap[block_group]) {
147 if (sb->s_inode_bitmap_number[block_group] !=
148 block_group)
149 panic ( "load_inode_bitmap:"
150 "block_group != inode_bitmap_number");
151 else
152 return block_group;
153 } else {
154 read_inode_bitmap (mp, block_group, block_group);
155 return block_group;
156 }
157 }
158
159 for (i = 0; i < sb->s_loaded_inode_bitmaps &&
160 sb->s_inode_bitmap_number[i] != block_group;
161 i++)
162 ;
163 if (i < sb->s_loaded_inode_bitmaps &&
164 sb->s_inode_bitmap_number[i] == block_group) {
165 inode_bitmap_number = sb->s_inode_bitmap_number[i];
166 inode_bitmap = sb->s_inode_bitmap[i];
167 for (j = i; j > 0; j--) {
168 sb->s_inode_bitmap_number[j] =
169 sb->s_inode_bitmap_number[j - 1];
170 sb->s_inode_bitmap[j] =
171 sb->s_inode_bitmap[j - 1];
172 }
173 sb->s_inode_bitmap_number[0] = inode_bitmap_number;
174 sb->s_inode_bitmap[0] = inode_bitmap;
175 } else {
176 if (sb->s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED)
177 sb->s_loaded_inode_bitmaps++;
178 else
179 ULCK_BUF(sb->s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1])
180 for (j = sb->s_loaded_inode_bitmaps - 1; j > 0; j--) {
181 sb->s_inode_bitmap_number[j] =
182 sb->s_inode_bitmap_number[j - 1];
183 sb->s_inode_bitmap[j] =
184 sb->s_inode_bitmap[j - 1];
185 }
186 read_inode_bitmap (mp, block_group, 0);
187 }
188 return 0;
189}
190
191
b1ce5639
SW
192void
193ext2_free_inode(struct inode *inode)
984263bc
MD
194{
195 struct ext2_sb_info * sb;
196 struct buffer_head * bh;
197 struct buffer_head * bh2;
198 unsigned long block_group;
199 unsigned long bit;
200 int bitmap_nr;
201 struct ext2_group_desc * gdp;
202 struct ext2_super_block * es;
203
204 if (!inode)
205 return;
206
207 if (inode->i_nlink) {
086c1d7e 208 kprintf ("ext2_free_inode: inode has nlink=%d\n",
984263bc
MD
209 inode->i_nlink);
210 return;
211 }
212
213 ext2_debug ("freeing inode %lu\n", inode->i_number);
214
215 sb = inode->i_e2fs;
216 lock_super (DEVVP(inode));
46932790 217 if (inode->i_number < EXT2_FIRST_INO(sb) ||
984263bc 218 inode->i_number > sb->s_es->s_inodes_count) {
086c1d7e 219 kprintf ("free_inode reserved inode or nonexistent inode");
984263bc
MD
220 unlock_super (DEVVP(inode));
221 return;
222 }
223 es = sb->s_es;
224 block_group = (inode->i_number - 1) / EXT2_INODES_PER_GROUP(sb);
225 bit = (inode->i_number - 1) % EXT2_INODES_PER_GROUP(sb);
226 bitmap_nr = load_inode_bitmap (ITOV(inode)->v_mount, block_group);
227 bh = sb->s_inode_bitmap[bitmap_nr];
228 if (!clear_bit (bit, bh->b_data))
086c1d7e 229 kprintf ( "ext2_free_inode:"
984263bc
MD
230 "bit already cleared for inode %lu",
231 (unsigned long)inode->i_number);
232 else {
233 gdp = get_group_desc (ITOV(inode)->v_mount, block_group, &bh2);
234 gdp->bg_free_inodes_count++;
235 if (S_ISDIR(inode->i_mode))
236 gdp->bg_used_dirs_count--;
237 mark_buffer_dirty(bh2);
238 es->s_free_inodes_count++;
239 }
240 mark_buffer_dirty(bh);
241/*** XXX
242 if (sb->s_flags & MS_SYNCHRONOUS) {
243 ll_rw_block (WRITE, 1, &bh);
244 wait_on_buffer (bh);
245 }
246***/
247 sb->s_dirt = 1;
248 unlock_super (DEVVP(inode));
249}
250
251#if linux
252/*
253 * This function increments the inode version number
254 *
255 * This may be used one day by the NFS server
256 */
b1ce5639
SW
257static void
258inc_inode_version(struct inode *inode, struct ext2_group_desc *gdp, int mode)
984263bc
MD
259{
260 unsigned long inode_block;
261 struct buffer_head * bh;
262 struct ext2_inode * raw_inode;
263
264 inode_block = gdp->bg_inode_table + (((inode->i_number - 1) %
265 EXT2_INODES_PER_GROUP(inode->i_sb)) /
266 EXT2_INODES_PER_BLOCK(inode->i_sb));
54078292 267 bh = bread (inode->i_sb->s_dev, dbtob(inode_block), inode->i_sb->s_blocksize);
984263bc 268 if (!bh) {
086c1d7e 269 kprintf ("inc_inode_version Cannot load inode table block - "
984263bc
MD
270 "inode=%lu, inode_block=%lu\n",
271 inode->i_number, inode_block);
272 inode->u.ext2_i.i_version = 1;
273 return;
274 }
275 raw_inode = ((struct ext2_inode *) bh->b_data) +
276 (((inode->i_number - 1) %
277 EXT2_INODES_PER_GROUP(inode->i_sb)) %
278 EXT2_INODES_PER_BLOCK(inode->i_sb));
279 raw_inode->i_version++;
280 inode->u.ext2_i.i_version = raw_inode->i_version;
281 bdwrite (bh);
282}
283
284#endif /* linux */
285
286/*
287 * There are two policies for allocating an inode. If the new inode is
288 * a directory, then a forward search is made for a block group with both
289 * free space and a low directory-to-inode ratio; if that fails, then of
290 * the groups with above-average free space, that group with the fewest
291 * directories already is chosen.
292 *
293 * For other inodes, search forward from the parent directory\'s block
294 * group to find a free inode.
295 */
296/*
297 * this functino has been reduced to the actual 'find the inode number' part
298 */
b1ce5639
SW
299ino_t
300ext2_new_inode(const struct inode *dir, int mode)
984263bc
MD
301{
302 struct ext2_sb_info * sb;
303 struct buffer_head * bh;
304 struct buffer_head * bh2;
305 int i, j, avefreei;
306 int bitmap_nr;
307 struct ext2_group_desc * gdp;
308 struct ext2_group_desc * tmp;
309 struct ext2_super_block * es;
310
311 if (!dir)
312 return 0;
313 sb = dir->i_e2fs;
314
315 lock_super (DEVVP(dir));
316 es = sb->s_es;
317repeat:
318 gdp = NULL; i=0;
319
320 if (S_ISDIR(mode)) {
321 avefreei = es->s_free_inodes_count /
322 sb->s_groups_count;
323/* I am not yet convinced that this next bit is necessary.
324 i = dir->u.ext2_i.i_block_group;
325 for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) {
326 tmp = get_group_desc (sb, i, &bh2);
327 if ((tmp->bg_used_dirs_count << 8) <
328 tmp->bg_free_inodes_count) {
329 gdp = tmp;
330 break;
331 }
332 else
333 i = ++i % sb->u.ext2_sb.s_groups_count;
334 }
335*/
336 if (!gdp) {
337 for (j = 0; j < sb->s_groups_count; j++) {
338 tmp = get_group_desc(ITOV(dir)->v_mount,j,&bh2);
339 if (tmp->bg_free_inodes_count &&
340 tmp->bg_free_inodes_count >= avefreei) {
341 if (!gdp ||
342 (tmp->bg_free_blocks_count >
343 gdp->bg_free_blocks_count)) {
344 i = j;
345 gdp = tmp;
346 }
347 }
348 }
349 }
350 }
351 else
352 {
353 /*
354 * Try to place the inode in its parent directory
355 */
356 i = dir->i_block_group;
357 tmp = get_group_desc (ITOV(dir)->v_mount, i, &bh2);
358 if (tmp->bg_free_inodes_count)
359 gdp = tmp;
360 else
361 {
362 /*
363 * Use a quadratic hash to find a group with a
364 * free inode
365 */
366 for (j = 1; j < sb->s_groups_count; j <<= 1) {
367 i += j;
368 if (i >= sb->s_groups_count)
369 i -= sb->s_groups_count;
370 tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2);
371 if (tmp->bg_free_inodes_count) {
372 gdp = tmp;
373 break;
374 }
375 }
376 }
377 if (!gdp) {
378 /*
379 * That failed: try linear search for a free inode
380 */
381 i = dir->i_block_group + 1;
382 for (j = 2; j < sb->s_groups_count; j++) {
383 if (++i >= sb->s_groups_count)
384 i = 0;
385 tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2);
386 if (tmp->bg_free_inodes_count) {
387 gdp = tmp;
388 break;
389 }
390 }
391 }
392 }
393
394 if (!gdp) {
395 unlock_super (DEVVP(dir));
396 return 0;
397 }
398 bitmap_nr = load_inode_bitmap (ITOV(dir)->v_mount, i);
399 bh = sb->s_inode_bitmap[bitmap_nr];
400 if ((j = find_first_zero_bit ((unsigned long *) bh->b_data,
401 EXT2_INODES_PER_GROUP(sb))) <
402 EXT2_INODES_PER_GROUP(sb)) {
403 if (set_bit (j, bh->b_data)) {
086c1d7e 404 kprintf ( "ext2_new_inode:"
984263bc
MD
405 "bit already set for inode %d", j);
406 goto repeat;
407 }
408/* Linux now does the following:
409 mark_buffer_dirty(bh);
410 if (sb->s_flags & MS_SYNCHRONOUS) {
411 ll_rw_block (WRITE, 1, &bh);
412 wait_on_buffer (bh);
413 }
414*/
415 mark_buffer_dirty(bh);
416 } else {
417 if (gdp->bg_free_inodes_count != 0) {
086c1d7e 418 kprintf ( "ext2_new_inode:"
984263bc
MD
419 "Free inodes count corrupted in group %d",
420 i);
421 unlock_super (DEVVP(dir));
422 return 0;
423 }
424 goto repeat;
425 }
426 j += i * EXT2_INODES_PER_GROUP(sb) + 1;
46932790 427 if (j < EXT2_FIRST_INO(sb) || j > es->s_inodes_count) {
086c1d7e 428 kprintf ( "ext2_new_inode:"
984263bc
MD
429 "reserved inode or inode > inodes count - "
430 "block_group = %d,inode=%d", i, j);
431 unlock_super (DEVVP(dir));
432 return 0;
433 }
434 gdp->bg_free_inodes_count--;
435 if (S_ISDIR(mode))
436 gdp->bg_used_dirs_count++;
437 mark_buffer_dirty(bh2);
438 es->s_free_inodes_count--;
439 /* mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); */
440 sb->s_dirt = 1;
441 unlock_super (DEVVP(dir));
442 return j;
443}
444
445#ifdef unused
b1ce5639
SW
446static unsigned long
447ext2_count_free_inodes(struct mount *mp)
984263bc
MD
448{
449#ifdef EXT2FS_DEBUG
1f1db49f 450 struct ext2_sb_info *sb = VFSTOEXT2(mp)->um_e2fs;
984263bc
MD
451 struct ext2_super_block * es;
452 unsigned long desc_count, bitmap_count, x;
453 int bitmap_nr;
454 struct ext2_group_desc * gdp;
455 int i;
456
1f1db49f 457 lock_super (VFSTOEXT2(mp)->um_devvp);
984263bc
MD
458 es = sb->s_es;
459 desc_count = 0;
460 bitmap_count = 0;
461 gdp = NULL;
462 for (i = 0; i < sb->s_groups_count; i++) {
463 gdp = get_group_desc (mp, i, NULL);
464 desc_count += gdp->bg_free_inodes_count;
465 bitmap_nr = load_inode_bitmap (mp, i);
466 x = ext2_count_free (sb->s_inode_bitmap[bitmap_nr],
467 EXT2_INODES_PER_GROUP(sb) / 8);
468 ext2_debug ("group %d: stored = %d, counted = %lu\n",
469 i, gdp->bg_free_inodes_count, x);
470 bitmap_count += x;
471 }
472 ext2_debug("stored = %lu, computed = %lu, %lu\n",
473 es->s_free_inodes_count, desc_count, bitmap_count);
1f1db49f 474 unlock_super (VFSTOEXT2(mp)->um_devvp);
984263bc
MD
475 return desc_count;
476#else
1f1db49f 477 return VFSTOEXT2(mp)->um_e2fsb->s_free_inodes_count;
984263bc
MD
478#endif
479}
480#endif /* unused */
481
482#ifdef LATER
b1ce5639
SW
483void
484ext2_check_inodes_bitmap(struct mount *mp)
984263bc
MD
485{
486 struct ext2_super_block * es;
487 unsigned long desc_count, bitmap_count, x;
488 int bitmap_nr;
489 struct ext2_group_desc * gdp;
490 int i;
491
492 lock_super (sb);
493 es = sb->u.ext2_sb.s_es;
494 desc_count = 0;
495 bitmap_count = 0;
496 gdp = NULL;
497 for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) {
498 gdp = get_group_desc (sb, i, NULL);
499 desc_count += gdp->bg_free_inodes_count;
500 bitmap_nr = load_inode_bitmap (sb, i);
501 x = ext2_count_free (sb->u.ext2_sb.s_inode_bitmap[bitmap_nr],
502 EXT2_INODES_PER_GROUP(sb) / 8);
503 if (gdp->bg_free_inodes_count != x)
086c1d7e 504 kprintf ( "ext2_check_inodes_bitmap:"
984263bc
MD
505 "Wrong free inodes count in group %d, "
506 "stored = %d, counted = %lu", i,
507 gdp->bg_free_inodes_count, x);
508 bitmap_count += x;
509 }
510 if (es->s_free_inodes_count != bitmap_count)
086c1d7e 511 kprintf ( "ext2_check_inodes_bitmap:"
984263bc
MD
512 "Wrong free inodes count in super block, "
513 "stored = %lu, counted = %lu",
514 (unsigned long) es->s_free_inodes_count, bitmap_count);
515 unlock_super (sb);
516}
517#endif