| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1982, 1986, 1989, 1993 | |
| 3 | * The Regents of the University of California. All rights reserved. | |
| 4 | * | |
| 5 | * Redistribution and use in source and binary forms, with or without | |
| 6 | * modification, are permitted provided that the following conditions | |
| 7 | * are met: | |
| 8 | * 1. Redistributions of source code must retain the above copyright | |
| 9 | * notice, this list of conditions and the following disclaimer. | |
| 10 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 11 | * notice, this list of conditions and the following disclaimer in the | |
| 12 | * documentation and/or other materials provided with the distribution. | |
| 13 | * 3. All advertising materials mentioning features or use of this software | |
| 14 | * must display the following acknowledgement: | |
| 15 | * This product includes software developed by the University of | |
| 16 | * California, Berkeley and its contributors. | |
| 17 | * 4. Neither the name of the University nor the names of its contributors | |
| 18 | * may be used to endorse or promote products derived from this software | |
| 19 | * without specific prior written permission. | |
| 20 | * | |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 31 | * SUCH DAMAGE. | |
| 32 | * | |
| 33 | * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 | |
| 34 | * $FreeBSD: src/sys/ufs/ffs/ffs_alloc.c,v 1.64.2.2 2001/09/21 19:15:21 dillon Exp $ | |
| 72b70bdb | 35 | * $DragonFly: src/sys/vfs/ufs/ffs_alloc.c,v 1.27 2006/12/29 17:10:20 swildner Exp $ |
| 984263bc MD |
36 | */ |
| 37 | ||
| 38 | #include "opt_quota.h" | |
| 39 | ||
| 40 | #include <sys/param.h> | |
| 41 | #include <sys/systm.h> | |
| 42 | #include <sys/buf.h> | |
| e0fb398b | 43 | #include <sys/buf2.h> |
| 984263bc MD |
44 | #include <sys/conf.h> |
| 45 | #include <sys/proc.h> | |
| 46 | #include <sys/vnode.h> | |
| 47 | #include <sys/mount.h> | |
| 48 | #include <sys/kernel.h> | |
| 49 | #include <sys/sysctl.h> | |
| 50 | #include <sys/syslog.h> | |
| 51 | ||
| e0fb398b | 52 | #include <sys/taskqueue.h> |
| f91a71dd JS |
53 | #include <machine/inttypes.h> |
| 54 | ||
| 1f2de5d4 MD |
55 | #include "quota.h" |
| 56 | #include "inode.h" | |
| 57 | #include "ufs_extern.h" | |
| 58 | #include "ufsmount.h" | |
| 984263bc | 59 | |
| 1f2de5d4 MD |
60 | #include "fs.h" |
| 61 | #include "ffs_extern.h" | |
| 984263bc | 62 | |
| a6ee311a RG |
63 | typedef ufs_daddr_t allocfcn_t (struct inode *ip, int cg, ufs_daddr_t bpref, |
| 64 | int size); | |
| 984263bc | 65 | |
| a6ee311a | 66 | static ufs_daddr_t ffs_alloccg (struct inode *, int, ufs_daddr_t, int); |
| 984263bc | 67 | static ufs_daddr_t |
| a6ee311a | 68 | ffs_alloccgblk (struct inode *, struct buf *, ufs_daddr_t); |
| e0fb398b T |
69 | static void ffs_blkfree_cg(struct fs *, struct vnode *, cdev_t , ino_t, |
| 70 | uint32_t , ufs_daddr_t, long ); | |
| 984263bc | 71 | #ifdef DIAGNOSTIC |
| a6ee311a | 72 | static int ffs_checkblk (struct inode *, ufs_daddr_t, long); |
| 984263bc | 73 | #endif |
| a6ee311a RG |
74 | static void ffs_clusteracct (struct fs *, struct cg *, ufs_daddr_t, |
| 75 | int); | |
| 76 | static ufs_daddr_t ffs_clusteralloc (struct inode *, int, ufs_daddr_t, | |
| 77 | int); | |
| 78 | static ino_t ffs_dirpref (struct inode *); | |
| 79 | static ufs_daddr_t ffs_fragextend (struct inode *, int, long, int, int); | |
| f719c866 | 80 | static void ffs_fserr (struct fs *, uint, char *); |
| 984263bc | 81 | static u_long ffs_hashalloc |
| a6ee311a RG |
82 | (struct inode *, int, long, int, allocfcn_t *); |
| 83 | static ino_t ffs_nodealloccg (struct inode *, int, ufs_daddr_t, int); | |
| 84 | static ufs_daddr_t ffs_mapsearch (struct fs *, struct cg *, ufs_daddr_t, | |
| 85 | int); | |
| 984263bc MD |
86 | |
| 87 | /* | |
| f719c866 | 88 | * Allocate a block in the filesystem. |
| 984263bc MD |
89 | * |
| 90 | * The size of the requested block is given, which must be some | |
| 91 | * multiple of fs_fsize and <= fs_bsize. | |
| 92 | * A preference may be optionally specified. If a preference is given | |
| 93 | * the following hierarchy is used to allocate a block: | |
| 94 | * 1) allocate the requested block. | |
| 95 | * 2) allocate a rotationally optimal block in the same cylinder. | |
| 96 | * 3) allocate a block in the same cylinder group. | |
| 97 | * 4) quadradically rehash into other cylinder groups, until an | |
| 98 | * available block is located. | |
| 99 | * If no block preference is given the following heirarchy is used | |
| 100 | * to allocate a block: | |
| 101 | * 1) allocate a block in the cylinder group that contains the | |
| 102 | * inode for the file. | |
| 103 | * 2) quadradically rehash into other cylinder groups, until an | |
| 104 | * available block is located. | |
| 105 | */ | |
| 106 | int | |
| 0973c589 CP |
107 | ffs_alloc(struct inode *ip, ufs_daddr_t lbn, ufs_daddr_t bpref, int size, |
| 108 | struct ucred *cred, ufs_daddr_t *bnp) | |
| 984263bc | 109 | { |
| 3ff2135f | 110 | struct fs *fs; |
| 984263bc MD |
111 | ufs_daddr_t bno; |
| 112 | int cg; | |
| 113 | #ifdef QUOTA | |
| 114 | int error; | |
| 115 | #endif | |
| 116 | ||
| 117 | *bnp = 0; | |
| 118 | fs = ip->i_fs; | |
| 119 | #ifdef DIAGNOSTIC | |
| f719c866 | 120 | if ((uint)size > fs->fs_bsize || fragoff(fs, size) != 0) { |
| 086c1d7e | 121 | kprintf("dev = %s, bsize = %ld, size = %d, fs = %s\n", |
| 984263bc MD |
122 | devtoname(ip->i_dev), (long)fs->fs_bsize, size, |
| 123 | fs->fs_fsmnt); | |
| 124 | panic("ffs_alloc: bad size"); | |
| 125 | } | |
| 126 | if (cred == NOCRED) | |
| 127 | panic("ffs_alloc: missing credential"); | |
| 128 | #endif /* DIAGNOSTIC */ | |
| 129 | if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) | |
| 130 | goto nospace; | |
| 131 | if (cred->cr_uid != 0 && | |
| 132 | freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) | |
| 133 | goto nospace; | |
| 134 | #ifdef QUOTA | |
| 50e58362 | 135 | error = ufs_chkdq(ip, (long)btodb(size), cred, 0); |
| 984263bc MD |
136 | if (error) |
| 137 | return (error); | |
| 138 | #endif | |
| 139 | if (bpref >= fs->fs_size) | |
| 140 | bpref = 0; | |
| 141 | if (bpref == 0) | |
| 142 | cg = ino_to_cg(fs, ip->i_number); | |
| 143 | else | |
| 144 | cg = dtog(fs, bpref); | |
| 145 | bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, | |
| 146 | ffs_alloccg); | |
| 147 | if (bno > 0) { | |
| 148 | ip->i_blocks += btodb(size); | |
| 149 | ip->i_flag |= IN_CHANGE | IN_UPDATE; | |
| 150 | *bnp = bno; | |
| 151 | return (0); | |
| 152 | } | |
| 153 | #ifdef QUOTA | |
| 154 | /* | |
| 155 | * Restore user's disk quota because allocation failed. | |
| 156 | */ | |
| 50e58362 | 157 | (void) ufs_chkdq(ip, (long)-btodb(size), cred, FORCE); |
| 984263bc MD |
158 | #endif |
| 159 | nospace: | |
| f719c866 DR |
160 | ffs_fserr(fs, cred->cr_uid, "filesystem full"); |
| 161 | uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); | |
| 984263bc MD |
162 | return (ENOSPC); |
| 163 | } | |
| 164 | ||
| 165 | /* | |
| 166 | * Reallocate a fragment to a bigger size | |
| 167 | * | |
| 168 | * The number and size of the old block is given, and a preference | |
| 169 | * and new size is also specified. The allocator attempts to extend | |
| 170 | * the original block. Failing that, the regular block allocator is | |
| 171 | * invoked to get an appropriate block. | |
| 172 | */ | |
| 173 | int | |
| 0973c589 CP |
174 | ffs_realloccg(struct inode *ip, ufs_daddr_t lbprev, ufs_daddr_t bpref, |
| 175 | int osize, int nsize, struct ucred *cred, struct buf **bpp) | |
| 984263bc | 176 | { |
| 3ff2135f | 177 | struct fs *fs; |
| 984263bc MD |
178 | struct buf *bp; |
| 179 | int cg, request, error; | |
| 180 | ufs_daddr_t bprev, bno; | |
| 181 | ||
| 182 | *bpp = 0; | |
| 183 | fs = ip->i_fs; | |
| 184 | #ifdef DIAGNOSTIC | |
| f719c866 DR |
185 | if ((uint)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || |
| 186 | (uint)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { | |
| 086c1d7e | 187 | kprintf( |
| 984263bc MD |
188 | "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", |
| 189 | devtoname(ip->i_dev), (long)fs->fs_bsize, osize, | |
| 190 | nsize, fs->fs_fsmnt); | |
| 191 | panic("ffs_realloccg: bad size"); | |
| 192 | } | |
| 193 | if (cred == NOCRED) | |
| 194 | panic("ffs_realloccg: missing credential"); | |
| 195 | #endif /* DIAGNOSTIC */ | |
| 196 | if (cred->cr_uid != 0 && | |
| 197 | freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) | |
| 198 | goto nospace; | |
| 199 | if ((bprev = ip->i_db[lbprev]) == 0) { | |
| 086c1d7e | 200 | kprintf("dev = %s, bsize = %ld, bprev = %ld, fs = %s\n", |
| 984263bc MD |
201 | devtoname(ip->i_dev), (long)fs->fs_bsize, (long)bprev, |
| 202 | fs->fs_fsmnt); | |
| 203 | panic("ffs_realloccg: bad bprev"); | |
| 204 | } | |
| 205 | /* | |
| 206 | * Allocate the extra space in the buffer. | |
| 207 | */ | |
| 54078292 | 208 | error = bread(ITOV(ip), lblktodoff(fs, lbprev), osize, &bp); |
| 984263bc MD |
209 | if (error) { |
| 210 | brelse(bp); | |
| 211 | return (error); | |
| 212 | } | |
| 213 | ||
| 54078292 | 214 | if(bp->b_bio2.bio_offset == NOOFFSET) { |
| 984263bc MD |
215 | if( lbprev >= NDADDR) |
| 216 | panic("ffs_realloccg: lbprev out of range"); | |
| 54078292 | 217 | bp->b_bio2.bio_offset = fsbtodoff(fs, bprev); |
| 984263bc MD |
218 | } |
| 219 | ||
| 220 | #ifdef QUOTA | |
| 50e58362 | 221 | error = ufs_chkdq(ip, (long)btodb(nsize - osize), cred, 0); |
| 984263bc MD |
222 | if (error) { |
| 223 | brelse(bp); | |
| 224 | return (error); | |
| 225 | } | |
| 226 | #endif | |
| 227 | /* | |
| 228 | * Check for extension in the existing location. | |
| 229 | */ | |
| 230 | cg = dtog(fs, bprev); | |
| 231 | bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize); | |
| 232 | if (bno) { | |
| 54078292 | 233 | if (bp->b_bio2.bio_offset != fsbtodoff(fs, bno)) |
| 984263bc MD |
234 | panic("ffs_realloccg: bad blockno"); |
| 235 | ip->i_blocks += btodb(nsize - osize); | |
| 236 | ip->i_flag |= IN_CHANGE | IN_UPDATE; | |
| 237 | allocbuf(bp, nsize); | |
| f719c866 | 238 | bzero((char *)bp->b_data + osize, (uint)nsize - osize); |
| 984263bc MD |
239 | *bpp = bp; |
| 240 | return (0); | |
| 241 | } | |
| 242 | /* | |
| 243 | * Allocate a new disk location. | |
| 244 | */ | |
| 245 | if (bpref >= fs->fs_size) | |
| 246 | bpref = 0; | |
| 247 | switch ((int)fs->fs_optim) { | |
| 248 | case FS_OPTSPACE: | |
| 249 | /* | |
| 250 | * Allocate an exact sized fragment. Although this makes | |
| 251 | * best use of space, we will waste time relocating it if | |
| 252 | * the file continues to grow. If the fragmentation is | |
| 253 | * less than half of the minimum free reserve, we choose | |
| 254 | * to begin optimizing for time. | |
| 255 | */ | |
| 256 | request = nsize; | |
| 257 | if (fs->fs_minfree <= 5 || | |
| 258 | fs->fs_cstotal.cs_nffree > | |
| 259 | (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) | |
| 260 | break; | |
| 261 | log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", | |
| 262 | fs->fs_fsmnt); | |
| 263 | fs->fs_optim = FS_OPTTIME; | |
| 264 | break; | |
| 265 | case FS_OPTTIME: | |
| 266 | /* | |
| 267 | * At this point we have discovered a file that is trying to | |
| 268 | * grow a small fragment to a larger fragment. To save time, | |
| 269 | * we allocate a full sized block, then free the unused portion. | |
| 270 | * If the file continues to grow, the `ffs_fragextend' call | |
| 271 | * above will be able to grow it in place without further | |
| 272 | * copying. If aberrant programs cause disk fragmentation to | |
| 273 | * grow within 2% of the free reserve, we choose to begin | |
| 274 | * optimizing for space. | |
| 275 | */ | |
| 276 | request = fs->fs_bsize; | |
| 277 | if (fs->fs_cstotal.cs_nffree < | |
| 278 | (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) | |
| 279 | break; | |
| 280 | log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", | |
| 281 | fs->fs_fsmnt); | |
| 282 | fs->fs_optim = FS_OPTSPACE; | |
| 283 | break; | |
| 284 | default: | |
| 086c1d7e | 285 | kprintf("dev = %s, optim = %ld, fs = %s\n", |
| 984263bc MD |
286 | devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); |
| 287 | panic("ffs_realloccg: bad optim"); | |
| 288 | /* NOTREACHED */ | |
| 289 | } | |
| 290 | bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, | |
| 291 | ffs_alloccg); | |
| 292 | if (bno > 0) { | |
| 54078292 | 293 | bp->b_bio2.bio_offset = fsbtodoff(fs, bno); |
| 984263bc MD |
294 | if (!DOINGSOFTDEP(ITOV(ip))) |
| 295 | ffs_blkfree(ip, bprev, (long)osize); | |
| 296 | if (nsize < request) | |
| 297 | ffs_blkfree(ip, bno + numfrags(fs, nsize), | |
| 298 | (long)(request - nsize)); | |
| 299 | ip->i_blocks += btodb(nsize - osize); | |
| 300 | ip->i_flag |= IN_CHANGE | IN_UPDATE; | |
| 301 | allocbuf(bp, nsize); | |
| f719c866 | 302 | bzero((char *)bp->b_data + osize, (uint)nsize - osize); |
| 984263bc MD |
303 | *bpp = bp; |
| 304 | return (0); | |
| 305 | } | |
| 306 | #ifdef QUOTA | |
| 307 | /* | |
| 308 | * Restore user's disk quota because allocation failed. | |
| 309 | */ | |
| 50e58362 | 310 | (void) ufs_chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); |
| 984263bc MD |
311 | #endif |
| 312 | brelse(bp); | |
| 313 | nospace: | |
| 314 | /* | |
| 315 | * no space available | |
| 316 | */ | |
| f719c866 DR |
317 | ffs_fserr(fs, cred->cr_uid, "filesystem full"); |
| 318 | uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); | |
| 984263bc MD |
319 | return (ENOSPC); |
| 320 | } | |
| 321 | ||
| 322 | SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); | |
| 323 | ||
| 324 | /* | |
| 325 | * Reallocate a sequence of blocks into a contiguous sequence of blocks. | |
| 326 | * | |
| 327 | * The vnode and an array of buffer pointers for a range of sequential | |
| 328 | * logical blocks to be made contiguous is given. The allocator attempts | |
| 329 | * to find a range of sequential blocks starting as close as possible to | |
| 330 | * an fs_rotdelay offset from the end of the allocation for the logical | |
| 331 | * block immediately preceeding the current range. If successful, the | |
| 332 | * physical block numbers in the buffer pointers and in the inode are | |
| 333 | * changed to reflect the new allocation. If unsuccessful, the allocation | |
| 334 | * is left unchanged. The success in doing the reallocation is returned. | |
| 335 | * Note that the error return is not reflected back to the user. Rather | |
| 336 | * the previous block allocation will be used. | |
| 337 | */ | |
| 338 | static int doasyncfree = 1; | |
| 339 | SYSCTL_INT(_vfs_ffs, FFS_ASYNCFREE, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); | |
| 340 | ||
| 341 | static int doreallocblks = 1; | |
| 342 | SYSCTL_INT(_vfs_ffs, FFS_REALLOCBLKS, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); | |
| 343 | ||
| 344 | #ifdef DEBUG | |
| 345 | static volatile int prtrealloc = 0; | |
| 346 | #endif | |
| 347 | ||
| 0973c589 CP |
348 | /* |
| 349 | * ffs_reallocblks(struct vnode *a_vp, struct cluster_save *a_buflist) | |
| 350 | */ | |
| 984263bc | 351 | int |
| 0973c589 | 352 | ffs_reallocblks(struct vop_reallocblks_args *ap) |
| 984263bc MD |
353 | { |
| 354 | struct fs *fs; | |
| 355 | struct inode *ip; | |
| 356 | struct vnode *vp; | |
| 357 | struct buf *sbp, *ebp; | |
| 358 | ufs_daddr_t *bap, *sbap, *ebap = 0; | |
| 359 | struct cluster_save *buflist; | |
| 360 | ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno; | |
| 54078292 MD |
361 | #ifdef DIAGNOSTIC |
| 362 | off_t boffset; | |
| 363 | #endif | |
| 984263bc | 364 | struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; |
| d490cb23 | 365 | int i, len, slen, start_lvl, end_lvl, pref, ssize; |
| 984263bc MD |
366 | |
| 367 | if (doreallocblks == 0) | |
| 368 | return (ENOSPC); | |
| 369 | vp = ap->a_vp; | |
| 370 | ip = VTOI(vp); | |
| 371 | fs = ip->i_fs; | |
| 372 | if (fs->fs_contigsumsize <= 0) | |
| 373 | return (ENOSPC); | |
| 374 | buflist = ap->a_buflist; | |
| 375 | len = buflist->bs_nchildren; | |
| 54078292 | 376 | start_lbn = lblkno(fs, buflist->bs_children[0]->b_loffset); |
| 984263bc MD |
377 | end_lbn = start_lbn + len - 1; |
| 378 | #ifdef DIAGNOSTIC | |
| 379 | for (i = 0; i < len; i++) | |
| 380 | if (!ffs_checkblk(ip, | |
| 54078292 | 381 | dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), fs->fs_bsize)) |
| 984263bc | 382 | panic("ffs_reallocblks: unallocated block 1"); |
| 54078292 MD |
383 | for (i = 1; i < len; i++) { |
| 384 | if (buflist->bs_children[i]->b_loffset != lblktodoff(fs, start_lbn) + lblktodoff(fs, i)) | |
| 984263bc | 385 | panic("ffs_reallocblks: non-logical cluster"); |
| 54078292 MD |
386 | } |
| 387 | boffset = buflist->bs_children[0]->b_bio2.bio_offset; | |
| 388 | ssize = (int)fsbtodoff(fs, fs->fs_frag); | |
| 984263bc | 389 | for (i = 1; i < len - 1; i++) |
| 54078292 | 390 | if (buflist->bs_children[i]->b_bio2.bio_offset != boffset + (i * ssize)) |
| 984263bc MD |
391 | panic("ffs_reallocblks: non-physical cluster %d", i); |
| 392 | #endif | |
| 393 | /* | |
| 394 | * If the latest allocation is in a new cylinder group, assume that | |
| 395 | * the filesystem has decided to move and do not force it back to | |
| 396 | * the previous cylinder group. | |
| 397 | */ | |
| 54078292 MD |
398 | if (dtog(fs, dofftofsb(fs, buflist->bs_children[0]->b_bio2.bio_offset)) != |
| 399 | dtog(fs, dofftofsb(fs, buflist->bs_children[len - 1]->b_bio2.bio_offset))) | |
| 984263bc MD |
400 | return (ENOSPC); |
| 401 | if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || | |
| 402 | ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) | |
| 403 | return (ENOSPC); | |
| 404 | /* | |
| d490cb23 MD |
405 | * Get the starting offset and block map for the first block and |
| 406 | * the number of blocks that will fit into sbap starting at soff. | |
| 984263bc MD |
407 | */ |
| 408 | if (start_lvl == 0) { | |
| 409 | sbap = &ip->i_db[0]; | |
| 410 | soff = start_lbn; | |
| d490cb23 | 411 | slen = NDADDR - soff; |
| 984263bc MD |
412 | } else { |
| 413 | idp = &start_ap[start_lvl - 1]; | |
| 54078292 | 414 | if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->fs_bsize, &sbp)) { |
| 984263bc MD |
415 | brelse(sbp); |
| 416 | return (ENOSPC); | |
| 417 | } | |
| 418 | sbap = (ufs_daddr_t *)sbp->b_data; | |
| 419 | soff = idp->in_off; | |
| d490cb23 | 420 | slen = fs->fs_nindir - soff; |
| 984263bc MD |
421 | } |
| 422 | /* | |
| 423 | * Find the preferred location for the cluster. | |
| 424 | */ | |
| 425 | pref = ffs_blkpref(ip, start_lbn, soff, sbap); | |
| 1c9602b3 | 426 | |
| 984263bc MD |
427 | /* |
| 428 | * If the block range spans two block maps, get the second map. | |
| 429 | */ | |
| 430 | if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { | |
| 431 | ssize = len; | |
| 432 | } else { | |
| 433 | #ifdef DIAGNOSTIC | |
| 434 | if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) | |
| 435 | panic("ffs_reallocblk: start == end"); | |
| 436 | #endif | |
| 437 | ssize = len - (idp->in_off + 1); | |
| 54078292 | 438 | if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->fs_bsize, &ebp)) |
| 984263bc MD |
439 | goto fail; |
| 440 | ebap = (ufs_daddr_t *)ebp->b_data; | |
| 441 | } | |
| d490cb23 MD |
442 | |
| 443 | /* | |
| 444 | * Make sure we aren't spanning more then two blockmaps. ssize is | |
| 445 | * our calculation of the span we have to scan in the first blockmap, | |
| 446 | * while slen is our calculation of the number of entries available | |
| 447 | * in the first blockmap (from soff). | |
| 448 | */ | |
| 449 | if (ssize > slen) { | |
| 450 | panic("ffs_reallocblks: range spans more then two blockmaps!" | |
| 451 | " start_lbn %ld len %d (%d/%d)", | |
| 452 | (long)start_lbn, len, slen, ssize); | |
| 453 | } | |
| 984263bc MD |
454 | /* |
| 455 | * Search the block map looking for an allocation of the desired size. | |
| 456 | */ | |
| 457 | if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, | |
| 458 | len, ffs_clusteralloc)) == 0) | |
| 459 | goto fail; | |
| 460 | /* | |
| 461 | * We have found a new contiguous block. | |
| 462 | * | |
| 463 | * First we have to replace the old block pointers with the new | |
| 464 | * block pointers in the inode and indirect blocks associated | |
| 465 | * with the file. | |
| 466 | */ | |
| 467 | #ifdef DEBUG | |
| 468 | if (prtrealloc) | |
| 72b70bdb SW |
469 | kprintf("realloc: ino %ju, lbns %d-%d\n\told:", |
| 470 | (uintmax_t)ip->i_number, start_lbn, end_lbn); | |
| 984263bc MD |
471 | #endif |
| 472 | blkno = newblk; | |
| 473 | for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { | |
| 474 | if (i == ssize) { | |
| 475 | bap = ebap; | |
| 476 | soff = -i; | |
| 477 | } | |
| 478 | #ifdef DIAGNOSTIC | |
| 479 | if (!ffs_checkblk(ip, | |
| 54078292 | 480 | dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), fs->fs_bsize)) |
| 984263bc | 481 | panic("ffs_reallocblks: unallocated block 2"); |
| 54078292 | 482 | if (dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset) != *bap) |
| 984263bc MD |
483 | panic("ffs_reallocblks: alloc mismatch"); |
| 484 | #endif | |
| 485 | #ifdef DEBUG | |
| 486 | if (prtrealloc) | |
| 086c1d7e | 487 | kprintf(" %d,", *bap); |
| 984263bc MD |
488 | #endif |
| 489 | if (DOINGSOFTDEP(vp)) { | |
| 490 | if (sbap == &ip->i_db[0] && i < ssize) | |
| 491 | softdep_setup_allocdirect(ip, start_lbn + i, | |
| 492 | blkno, *bap, fs->fs_bsize, fs->fs_bsize, | |
| 493 | buflist->bs_children[i]); | |
| 494 | else | |
| 495 | softdep_setup_allocindir_page(ip, start_lbn + i, | |
| 496 | i < ssize ? sbp : ebp, soff + i, blkno, | |
| 497 | *bap, buflist->bs_children[i]); | |
| 498 | } | |
| 499 | *bap++ = blkno; | |
| 500 | } | |
| 501 | /* | |
| 502 | * Next we must write out the modified inode and indirect blocks. | |
| 503 | * For strict correctness, the writes should be synchronous since | |
| 504 | * the old block values may have been written to disk. In practise | |
| 505 | * they are almost never written, but if we are concerned about | |
| 506 | * strict correctness, the `doasyncfree' flag should be set to zero. | |
| 507 | * | |
| 508 | * The test on `doasyncfree' should be changed to test a flag | |
| 509 | * that shows whether the associated buffers and inodes have | |
| 510 | * been written. The flag should be set when the cluster is | |
| 511 | * started and cleared whenever the buffer or inode is flushed. | |
| 512 | * We can then check below to see if it is set, and do the | |
| 513 | * synchronous write only when it has been cleared. | |
| 514 | */ | |
| 515 | if (sbap != &ip->i_db[0]) { | |
| 516 | if (doasyncfree) | |
| 517 | bdwrite(sbp); | |
| 518 | else | |
| 519 | bwrite(sbp); | |
| 520 | } else { | |
| 521 | ip->i_flag |= IN_CHANGE | IN_UPDATE; | |
| 522 | if (!doasyncfree) | |
| ac690a1d | 523 | ffs_update(vp, 1); |
| 984263bc MD |
524 | } |
| 525 | if (ssize < len) { | |
| 526 | if (doasyncfree) | |
| 527 | bdwrite(ebp); | |
| 528 | else | |
| 529 | bwrite(ebp); | |
| 530 | } | |
| 531 | /* | |
| 532 | * Last, free the old blocks and assign the new blocks to the buffers. | |
| 533 | */ | |
| 534 | #ifdef DEBUG | |
| 535 | if (prtrealloc) | |
| 086c1d7e | 536 | kprintf("\n\tnew:"); |
| 984263bc MD |
537 | #endif |
| 538 | for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { | |
| 539 | if (!DOINGSOFTDEP(vp)) | |
| 540 | ffs_blkfree(ip, | |
| 54078292 | 541 | dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), |
| 984263bc | 542 | fs->fs_bsize); |
| 54078292 | 543 | buflist->bs_children[i]->b_bio2.bio_offset = fsbtodoff(fs, blkno); |
| 984263bc MD |
544 | #ifdef DIAGNOSTIC |
| 545 | if (!ffs_checkblk(ip, | |
| 54078292 | 546 | dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), fs->fs_bsize)) |
| 984263bc MD |
547 | panic("ffs_reallocblks: unallocated block 3"); |
| 548 | #endif | |
| 549 | #ifdef DEBUG | |
| 550 | if (prtrealloc) | |
| 086c1d7e | 551 | kprintf(" %d,", blkno); |
| 984263bc MD |
552 | #endif |
| 553 | } | |
| 554 | #ifdef DEBUG | |
| 555 | if (prtrealloc) { | |
| 556 | prtrealloc--; | |
| 086c1d7e | 557 | kprintf("\n"); |
| 984263bc MD |
558 | } |
| 559 | #endif | |
| 560 | return (0); | |
| 561 | ||
| 562 | fail: | |
| 563 | if (ssize < len) | |
| 564 | brelse(ebp); | |
| 565 | if (sbap != &ip->i_db[0]) | |
| 566 | brelse(sbp); | |
| 567 | return (ENOSPC); | |
| 568 | } | |
| 569 | ||
| 570 | /* | |
| f719c866 | 571 | * Allocate an inode in the filesystem. |
| 984263bc MD |
572 | * |
| 573 | * If allocating a directory, use ffs_dirpref to select the inode. | |
| 574 | * If allocating in a directory, the following hierarchy is followed: | |
| 575 | * 1) allocate the preferred inode. | |
| 576 | * 2) allocate an inode in the same cylinder group. | |
| 577 | * 3) quadradically rehash into other cylinder groups, until an | |
| 578 | * available inode is located. | |
| 579 | * If no inode preference is given the following heirarchy is used | |
| 580 | * to allocate an inode: | |
| 581 | * 1) allocate an inode in cylinder group 0. | |
| 582 | * 2) quadradically rehash into other cylinder groups, until an | |
| 583 | * available inode is located. | |
| 584 | */ | |
| 585 | int | |
| 0973c589 | 586 | ffs_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) |
| 984263bc | 587 | { |
| 3ff2135f RG |
588 | struct inode *pip; |
| 589 | struct fs *fs; | |
| 590 | struct inode *ip; | |
| 984263bc MD |
591 | ino_t ino, ipref; |
| 592 | int cg, error; | |
| 593 | ||
| 594 | *vpp = NULL; | |
| 595 | pip = VTOI(pvp); | |
| 596 | fs = pip->i_fs; | |
| 597 | if (fs->fs_cstotal.cs_nifree == 0) | |
| 598 | goto noinodes; | |
| 599 | ||
| 600 | if ((mode & IFMT) == IFDIR) | |
| 601 | ipref = ffs_dirpref(pip); | |
| 602 | else | |
| 603 | ipref = pip->i_number; | |
| 604 | if (ipref >= fs->fs_ncg * fs->fs_ipg) | |
| 605 | ipref = 0; | |
| 606 | cg = ino_to_cg(fs, ipref); | |
| 607 | /* | |
| 608 | * Track number of dirs created one after another | |
| 609 | * in a same cg without intervening by files. | |
| 610 | */ | |
| 611 | if ((mode & IFMT) == IFDIR) { | |
| 612 | if (fs->fs_contigdirs[cg] < 255) | |
| 613 | fs->fs_contigdirs[cg]++; | |
| 614 | } else { | |
| 615 | if (fs->fs_contigdirs[cg] > 0) | |
| 616 | fs->fs_contigdirs[cg]--; | |
| 617 | } | |
| 618 | ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, | |
| 619 | (allocfcn_t *)ffs_nodealloccg); | |
| 620 | if (ino == 0) | |
| 621 | goto noinodes; | |
| b9b0a6d0 | 622 | error = VFS_VGET(pvp->v_mount, NULL, ino, vpp); |
| 984263bc | 623 | if (error) { |
| ac690a1d | 624 | ffs_vfree(pvp, ino, mode); |
| 984263bc MD |
625 | return (error); |
| 626 | } | |
| 627 | ip = VTOI(*vpp); | |
| 628 | if (ip->i_mode) { | |
| 086c1d7e | 629 | kprintf("mode = 0%o, inum = %lu, fs = %s\n", |
| 984263bc MD |
630 | ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); |
| 631 | panic("ffs_valloc: dup alloc"); | |
| 632 | } | |
| 633 | if (ip->i_blocks) { /* XXX */ | |
| 086c1d7e | 634 | kprintf("free inode %s/%lu had %ld blocks\n", |
| 984263bc MD |
635 | fs->fs_fsmnt, (u_long)ino, (long)ip->i_blocks); |
| 636 | ip->i_blocks = 0; | |
| 637 | } | |
| 638 | ip->i_flags = 0; | |
| 639 | /* | |
| 640 | * Set up a new generation number for this inode. | |
| 641 | */ | |
| 642 | if (ip->i_gen == 0 || ++ip->i_gen == 0) | |
| cddfb7bb | 643 | ip->i_gen = krandom() / 2 + 1; |
| 984263bc MD |
644 | return (0); |
| 645 | noinodes: | |
| 646 | ffs_fserr(fs, cred->cr_uid, "out of inodes"); | |
| 647 | uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); | |
| 648 | return (ENOSPC); | |
| 649 | } | |
| 650 | ||
| 651 | /* | |
| 652 | * Find a cylinder group to place a directory. | |
| 653 | * | |
| 654 | * The policy implemented by this algorithm is to allocate a | |
| 655 | * directory inode in the same cylinder group as its parent | |
| 656 | * directory, but also to reserve space for its files inodes | |
| 657 | * and data. Restrict the number of directories which may be | |
| 658 | * allocated one after another in the same cylinder group | |
| 659 | * without intervening allocation of files. | |
| 660 | * | |
| 661 | * If we allocate a first level directory then force allocation | |
| 662 | * in another cylinder group. | |
| 663 | */ | |
| 664 | static ino_t | |
| 0973c589 | 665 | ffs_dirpref(struct inode *pip) |
| 984263bc | 666 | { |
| 3ff2135f | 667 | struct fs *fs; |
| 984263bc | 668 | int cg, prefcg, dirsize, cgsize; |
| 06a1dad1 | 669 | int64_t dirsize64; |
| 984263bc MD |
670 | int avgifree, avgbfree, avgndir, curdirsize; |
| 671 | int minifree, minbfree, maxndir; | |
| 672 | int mincg, minndir; | |
| 673 | int maxcontigdirs; | |
| 674 | ||
| 675 | fs = pip->i_fs; | |
| 676 | ||
| 677 | avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; | |
| 678 | avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; | |
| 679 | avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; | |
| 680 | ||
| 681 | /* | |
| 682 | * Force allocation in another cg if creating a first level dir. | |
| 683 | */ | |
| 684 | if (ITOV(pip)->v_flag & VROOT) { | |
| 0ced1954 | 685 | prefcg = karc4random() % fs->fs_ncg; |
| 984263bc MD |
686 | mincg = prefcg; |
| 687 | minndir = fs->fs_ipg; | |
| 688 | for (cg = prefcg; cg < fs->fs_ncg; cg++) | |
| 689 | if (fs->fs_cs(fs, cg).cs_ndir < minndir && | |
| 690 | fs->fs_cs(fs, cg).cs_nifree >= avgifree && | |
| 691 | fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { | |
| 692 | mincg = cg; | |
| 693 | minndir = fs->fs_cs(fs, cg).cs_ndir; | |
| 694 | } | |
| 695 | for (cg = 0; cg < prefcg; cg++) | |
| 696 | if (fs->fs_cs(fs, cg).cs_ndir < minndir && | |
| 697 | fs->fs_cs(fs, cg).cs_nifree >= avgifree && | |
| 698 | fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { | |
| 699 | mincg = cg; | |
| 700 | minndir = fs->fs_cs(fs, cg).cs_ndir; | |
| 701 | } | |
| 702 | return ((ino_t)(fs->fs_ipg * mincg)); | |
| 703 | } | |
| 704 | ||
| 705 | /* | |
| 706 | * Count various limits which used for | |
| 707 | * optimal allocation of a directory inode. | |
| 708 | */ | |
| 709 | maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); | |
| 04052b0e DR |
710 | minifree = avgifree - avgifree / 4; |
| 711 | if (minifree < 1) | |
| 712 | minifree = 1; | |
| 713 | minbfree = avgbfree - avgbfree / 4; | |
| 714 | if (minbfree < 1) | |
| 715 | minbfree = 1; | |
| 984263bc | 716 | cgsize = fs->fs_fsize * fs->fs_fpg; |
| 06a1dad1 MD |
717 | |
| 718 | /* | |
| 719 | * fs_avgfilesize and fs_avgfpdir are user-settable entities and | |
| 720 | * multiplying them may overflow a 32 bit integer. | |
| 721 | */ | |
| 722 | dirsize64 = fs->fs_avgfilesize * (int64_t)fs->fs_avgfpdir; | |
| 723 | if (dirsize64 > 0x7fffffff) { | |
| 984263bc | 724 | maxcontigdirs = 1; |
| 06a1dad1 MD |
725 | } else { |
| 726 | dirsize = (int)dirsize64; | |
| 727 | curdirsize = avgndir ? | |
| 728 | (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; | |
| 729 | if (dirsize < curdirsize) | |
| 730 | dirsize = curdirsize; | |
| 731 | maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255); | |
| 732 | if (fs->fs_avgfpdir > 0) | |
| 733 | maxcontigdirs = min(maxcontigdirs, | |
| 734 | fs->fs_ipg / fs->fs_avgfpdir); | |
| 735 | if (maxcontigdirs == 0) | |
| 736 | maxcontigdirs = 1; | |
| 737 | } | |
| 984263bc MD |
738 | |
| 739 | /* | |
| 740 | * Limit number of dirs in one cg and reserve space for | |
| 741 | * regular files, but only if we have no deficit in | |
| 742 | * inodes or space. | |
| 743 | */ | |
| 744 | prefcg = ino_to_cg(fs, pip->i_number); | |
| 745 | for (cg = prefcg; cg < fs->fs_ncg; cg++) | |
| 746 | if (fs->fs_cs(fs, cg).cs_ndir < maxndir && | |
| 747 | fs->fs_cs(fs, cg).cs_nifree >= minifree && | |
| 748 | fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { | |
| 749 | if (fs->fs_contigdirs[cg] < maxcontigdirs) | |
| 750 | return ((ino_t)(fs->fs_ipg * cg)); | |
| 751 | } | |
| 752 | for (cg = 0; cg < prefcg; cg++) | |
| 753 | if (fs->fs_cs(fs, cg).cs_ndir < maxndir && | |
| 754 | fs->fs_cs(fs, cg).cs_nifree >= minifree && | |
| 755 | fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { | |
| 756 | if (fs->fs_contigdirs[cg] < maxcontigdirs) | |
| 757 | return ((ino_t)(fs->fs_ipg * cg)); | |
| 758 | } | |
| 759 | /* | |
| 760 | * This is a backstop when we have deficit in space. | |
| 761 | */ | |
| 762 | for (cg = prefcg; cg < fs->fs_ncg; cg++) | |
| 763 | if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) | |
| 764 | return ((ino_t)(fs->fs_ipg * cg)); | |
| 765 | for (cg = 0; cg < prefcg; cg++) | |
| 766 | if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) | |
| 767 | break; | |
| 768 | return ((ino_t)(fs->fs_ipg * cg)); | |
| 769 | } | |
| 770 | ||
| 771 | /* | |
| 772 | * Select the desired position for the next block in a file. The file is | |
| 773 | * logically divided into sections. The first section is composed of the | |
| 774 | * direct blocks. Each additional section contains fs_maxbpg blocks. | |
| 775 | * | |
| 776 | * If no blocks have been allocated in the first section, the policy is to | |
| 777 | * request a block in the same cylinder group as the inode that describes | |
| 778 | * the file. If no blocks have been allocated in any other section, the | |
| 779 | * policy is to place the section in a cylinder group with a greater than | |
| 780 | * average number of free blocks. An appropriate cylinder group is found | |
| 781 | * by using a rotor that sweeps the cylinder groups. When a new group of | |
| 782 | * blocks is needed, the sweep begins in the cylinder group following the | |
| 783 | * cylinder group from which the previous allocation was made. The sweep | |
| 784 | * continues until a cylinder group with greater than the average number | |
| 785 | * of free blocks is found. If the allocation is for the first block in an | |
| 786 | * indirect block, the information on the previous allocation is unavailable; | |
| 787 | * here a best guess is made based upon the logical block number being | |
| 788 | * allocated. | |
| 789 | * | |
| 790 | * If a section is already partially allocated, the policy is to | |
| 791 | * contiguously allocate fs_maxcontig blocks. The end of one of these | |
| 792 | * contiguous blocks and the beginning of the next is physically separated | |
| 793 | * so that the disk head will be in transit between them for at least | |
| 794 | * fs_rotdelay milliseconds. This is to allow time for the processor to | |
| 795 | * schedule another I/O transfer. | |
| 796 | */ | |
| 797 | ufs_daddr_t | |
| 0973c589 | 798 | ffs_blkpref(struct inode *ip, ufs_daddr_t lbn, int indx, ufs_daddr_t *bap) |
| 984263bc | 799 | { |
| 3ff2135f RG |
800 | struct fs *fs; |
| 801 | int cg; | |
| 984263bc MD |
802 | int avgbfree, startcg; |
| 803 | ufs_daddr_t nextblk; | |
| 804 | ||
| 805 | fs = ip->i_fs; | |
| 806 | if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { | |
| 807 | if (lbn < NDADDR + NINDIR(fs)) { | |
| 808 | cg = ino_to_cg(fs, ip->i_number); | |
| 809 | return (fs->fs_fpg * cg + fs->fs_frag); | |
| 810 | } | |
| 811 | /* | |
| 812 | * Find a cylinder with greater than average number of | |
| 813 | * unused data blocks. | |
| 814 | */ | |
| 815 | if (indx == 0 || bap[indx - 1] == 0) | |
| 816 | startcg = | |
| 817 | ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; | |
| 818 | else | |
| 819 | startcg = dtog(fs, bap[indx - 1]) + 1; | |
| 820 | startcg %= fs->fs_ncg; | |
| 821 | avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; | |
| 822 | for (cg = startcg; cg < fs->fs_ncg; cg++) | |
| 823 | if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { | |
| 824 | fs->fs_cgrotor = cg; | |
| 825 | return (fs->fs_fpg * cg + fs->fs_frag); | |
| 826 | } | |
| 827 | for (cg = 0; cg <= startcg; cg++) | |
| 828 | if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { | |
| 829 | fs->fs_cgrotor = cg; | |
| 830 | return (fs->fs_fpg * cg + fs->fs_frag); | |
| 831 | } | |
| 832 | return (0); | |
| 833 | } | |
| 834 | /* | |
| 835 | * One or more previous blocks have been laid out. If less | |
| 836 | * than fs_maxcontig previous blocks are contiguous, the | |
| 837 | * next block is requested contiguously, otherwise it is | |
| 838 | * requested rotationally delayed by fs_rotdelay milliseconds. | |
| 839 | */ | |
| 840 | nextblk = bap[indx - 1] + fs->fs_frag; | |
| 841 | if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig || | |
| 842 | bap[indx - fs->fs_maxcontig] + | |
| 843 | blkstofrags(fs, fs->fs_maxcontig) != nextblk) | |
| 844 | return (nextblk); | |
| 845 | /* | |
| 846 | * Here we convert ms of delay to frags as: | |
| 847 | * (frags) = (ms) * (rev/sec) * (sect/rev) / | |
| 848 | * ((sect/frag) * (ms/sec)) | |
| 849 | * then round up to the next block. | |
| 850 | */ | |
| 851 | nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / | |
| 852 | (NSPF(fs) * 1000), fs->fs_frag); | |
| 853 | return (nextblk); | |
| 854 | } | |
| 855 | ||
| 856 | /* | |
| 857 | * Implement the cylinder overflow algorithm. | |
| 858 | * | |
| 859 | * The policy implemented by this algorithm is: | |
| 860 | * 1) allocate the block in its requested cylinder group. | |
| 861 | * 2) quadradically rehash on the cylinder group number. | |
| 862 | * 3) brute force search for a free block. | |
| 863 | */ | |
| 864 | /*VARARGS5*/ | |
| 865 | static u_long | |
| 0973c589 CP |
866 | ffs_hashalloc(struct inode *ip, int cg, long pref, |
| 867 | int size, /* size for data blocks, mode for inodes */ | |
| 868 | allocfcn_t *allocator) | |
| 984263bc | 869 | { |
| 3ff2135f | 870 | struct fs *fs; |
| 984263bc MD |
871 | long result; /* XXX why not same type as we return? */ |
| 872 | int i, icg = cg; | |
| 873 | ||
| 874 | fs = ip->i_fs; | |
| 875 | /* | |
| 876 | * 1: preferred cylinder group | |
| 877 | */ | |
| 878 | result = (*allocator)(ip, cg, pref, size); | |
| 879 | if (result) | |
| 880 | return (result); | |
| 881 | /* | |
| 882 | * 2: quadratic rehash | |
| 883 | */ | |
| 884 | for (i = 1; i < fs->fs_ncg; i *= 2) { | |
| 885 | cg += i; | |
| 886 | if (cg >= fs->fs_ncg) | |
| 887 | cg -= fs->fs_ncg; | |
| 888 | result = (*allocator)(ip, cg, 0, size); | |
| 889 | if (result) | |
| 890 | return (result); | |
| 891 | } | |
| 892 | /* | |
| 893 | * 3: brute force search | |
| 894 | * Note that we start at i == 2, since 0 was checked initially, | |
| 895 | * and 1 is always checked in the quadratic rehash. | |
| 896 | */ | |
| 897 | cg = (icg + 2) % fs->fs_ncg; | |
| 898 | for (i = 2; i < fs->fs_ncg; i++) { | |
| 899 | result = (*allocator)(ip, cg, 0, size); | |
| 900 | if (result) | |
| 901 | return (result); | |
| 902 | cg++; | |
| 903 | if (cg == fs->fs_ncg) | |
| 904 | cg = 0; | |
| 905 | } | |
| 906 | return (0); | |
| 907 | } | |
| 908 | ||
| 909 | /* | |
| 910 | * Determine whether a fragment can be extended. | |
| 911 | * | |
| 912 | * Check to see if the necessary fragments are available, and | |
| 913 | * if they are, allocate them. | |
| 914 | */ | |
| 915 | static ufs_daddr_t | |
| 0973c589 | 916 | ffs_fragextend(struct inode *ip, int cg, long bprev, int osize, int nsize) |
| 984263bc | 917 | { |
| 3ff2135f RG |
918 | struct fs *fs; |
| 919 | struct cg *cgp; | |
| 984263bc MD |
920 | struct buf *bp; |
| 921 | long bno; | |
| 922 | int frags, bbase; | |
| 923 | int i, error; | |
| f719c866 | 924 | uint8_t *blksfree; |
| 984263bc MD |
925 | |
| 926 | fs = ip->i_fs; | |
| 927 | if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) | |
| 928 | return (0); | |
| 929 | frags = numfrags(fs, nsize); | |
| 930 | bbase = fragnum(fs, bprev); | |
| 931 | if (bbase > fragnum(fs, (bprev + frags - 1))) { | |
| 932 | /* cannot extend across a block boundary */ | |
| 933 | return (0); | |
| 934 | } | |
| 5d5d3444 | 935 | KKASSERT(blknum(fs, bprev) == blknum(fs, bprev + frags - 1)); |
| 54078292 | 936 | error = bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), |
| 3b568787 | 937 | (int)fs->fs_cgsize, &bp); |
| 984263bc MD |
938 | if (error) { |
| 939 | brelse(bp); | |
| 940 | return (0); | |
| 941 | } | |
| 942 | cgp = (struct cg *)bp->b_data; | |
| 943 | if (!cg_chkmagic(cgp)) { | |
| 944 | brelse(bp); | |
| 945 | return (0); | |
| 946 | } | |
| 984263bc MD |
947 | cgp->cg_time = time_second; |
| 948 | bno = dtogd(fs, bprev); | |
| 949 | blksfree = cg_blksfree(cgp); | |
| 5d5d3444 | 950 | for (i = numfrags(fs, osize); i < frags; i++) { |
| 984263bc MD |
951 | if (isclr(blksfree, bno + i)) { |
| 952 | brelse(bp); | |
| 953 | return (0); | |
| 954 | } | |
| 5d5d3444 MD |
955 | } |
| 956 | ||
| 984263bc MD |
957 | /* |
| 958 | * the current fragment can be extended | |
| 959 | * deduct the count on fragment being extended into | |
| 960 | * increase the count on the remaining fragment (if any) | |
| 961 | * allocate the extended piece | |
| 5d5d3444 MD |
962 | * |
| 963 | * ---oooooooooonnnnnnn111---- | |
| 964 | * [-----frags-----] | |
| 965 | * ^ ^ | |
| 966 | * bbase fs_frag | |
| 984263bc | 967 | */ |
| 5d5d3444 | 968 | for (i = frags; i < fs->fs_frag - bbase; i++) { |
| 984263bc MD |
969 | if (isclr(blksfree, bno + i)) |
| 970 | break; | |
| 5d5d3444 MD |
971 | } |
| 972 | ||
| 973 | /* | |
| 974 | * Size of original free frag is [i - numfrags(fs, osize)] | |
| 975 | * Size of remaining free frag is [i - frags] | |
| 976 | */ | |
| 984263bc MD |
977 | cgp->cg_frsum[i - numfrags(fs, osize)]--; |
| 978 | if (i != frags) | |
| 979 | cgp->cg_frsum[i - frags]++; | |
| 980 | for (i = numfrags(fs, osize); i < frags; i++) { | |
| 981 | clrbit(blksfree, bno + i); | |
| 982 | cgp->cg_cs.cs_nffree--; | |
| 983 | fs->fs_cstotal.cs_nffree--; | |
| 984 | fs->fs_cs(fs, cg).cs_nffree--; | |
| 985 | } | |
| 986 | fs->fs_fmod = 1; | |
| 987 | if (DOINGSOFTDEP(ITOV(ip))) | |
| 988 | softdep_setup_blkmapdep(bp, fs, bprev); | |
| 989 | bdwrite(bp); | |
| 990 | return (bprev); | |
| 991 | } | |
| 992 | ||
| 993 | /* | |
| 994 | * Determine whether a block can be allocated. | |
| 995 | * | |
| 996 | * Check to see if a block of the appropriate size is available, | |
| 997 | * and if it is, allocate it. | |
| 998 | */ | |
| 999 | static ufs_daddr_t | |
| 0973c589 | 1000 | ffs_alloccg(struct inode *ip, int cg, ufs_daddr_t bpref, int size) |
| 984263bc | 1001 | { |
| 3ff2135f RG |
1002 | struct fs *fs; |
| 1003 | struct cg *cgp; | |
| 984263bc | 1004 | struct buf *bp; |
| 3ff2135f | 1005 | int i; |
| 984263bc MD |
1006 | ufs_daddr_t bno, blkno; |
| 1007 | int allocsiz, error, frags; | |
| f719c866 | 1008 | uint8_t *blksfree; |
| 984263bc MD |
1009 | |
| 1010 | fs = ip->i_fs; | |
| 1011 | if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) | |
| 1012 | return (0); | |
| 54078292 | 1013 | error = bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), |
| 3b568787 | 1014 | (int)fs->fs_cgsize, &bp); |
| 984263bc MD |
1015 | if (error) { |
| 1016 | brelse(bp); | |
| 1017 | return (0); | |
| 1018 | } | |
| 1019 | cgp = (struct cg *)bp->b_data; | |
| 1020 | if (!cg_chkmagic(cgp) || | |
| 1021 | (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { | |
| 1022 | brelse(bp); | |
| 1023 | return (0); | |
| 1024 | } | |
| 984263bc MD |
1025 | cgp->cg_time = time_second; |
| 1026 | if (size == fs->fs_bsize) { | |
| 1027 | bno = ffs_alloccgblk(ip, bp, bpref); | |
| 1028 | bdwrite(bp); | |
| 1029 | return (bno); | |
| 1030 | } | |
| 1031 | /* | |
| 5d5d3444 MD |
1032 | * Check to see if any fragments of sufficient size are already |
| 1033 | * available. Fit the data into a larger fragment if necessary, | |
| 1034 | * before allocating a whole new block. | |
| 984263bc MD |
1035 | */ |
| 1036 | blksfree = cg_blksfree(cgp); | |
| 1037 | frags = numfrags(fs, size); | |
| 5d5d3444 | 1038 | for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) { |
| 984263bc MD |
1039 | if (cgp->cg_frsum[allocsiz] != 0) |
| 1040 | break; | |
| 5d5d3444 | 1041 | } |
| 984263bc MD |
1042 | if (allocsiz == fs->fs_frag) { |
| 1043 | /* | |
| 5d5d3444 MD |
1044 | * No fragments were available, allocate a whole block and |
| 1045 | * cut the requested fragment (of size frags) out of it. | |
| 984263bc MD |
1046 | */ |
| 1047 | if (cgp->cg_cs.cs_nbfree == 0) { | |
| 1048 | brelse(bp); | |
| 1049 | return (0); | |
| 1050 | } | |
| 1051 | bno = ffs_alloccgblk(ip, bp, bpref); | |
| 1052 | bpref = dtogd(fs, bno); | |
| 1053 | for (i = frags; i < fs->fs_frag; i++) | |
| 1054 | setbit(blksfree, bpref + i); | |
| 5d5d3444 MD |
1055 | |
| 1056 | /* | |
| 1057 | * Calculate the number of free frags still remaining after | |
| 1058 | * we have cut out the requested allocation. Indicate that | |
| 1059 | * a fragment of that size is now available for future | |
| 1060 | * allocation. | |
| 1061 | */ | |
| 984263bc MD |
1062 | i = fs->fs_frag - frags; |
| 1063 | cgp->cg_cs.cs_nffree += i; | |
| 1064 | fs->fs_cstotal.cs_nffree += i; | |
| 1065 | fs->fs_cs(fs, cg).cs_nffree += i; | |
| 1066 | fs->fs_fmod = 1; | |
| 1067 | cgp->cg_frsum[i]++; | |
| 1068 | bdwrite(bp); | |
| 1069 | return (bno); | |
| 1070 | } | |
| 5d5d3444 MD |
1071 | |
| 1072 | /* | |
| 1073 | * cg_frsum[] has told us that a free fragment of allocsiz size is | |
| 1074 | * available. Find it, then clear the bitmap bits associated with | |
| 1075 | * the size we want. | |
| 1076 | */ | |
| 984263bc MD |
1077 | bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); |
| 1078 | if (bno < 0) { | |
| 1079 | brelse(bp); | |
| 1080 | return (0); | |
| 1081 | } | |
| 1082 | for (i = 0; i < frags; i++) | |
| 1083 | clrbit(blksfree, bno + i); | |
| 1084 | cgp->cg_cs.cs_nffree -= frags; | |
| 1085 | fs->fs_cstotal.cs_nffree -= frags; | |
| 1086 | fs->fs_cs(fs, cg).cs_nffree -= frags; | |
| 1087 | fs->fs_fmod = 1; | |
| 5d5d3444 MD |
1088 | |
| 1089 | /* | |
| 1090 | * Account for the allocation. The original searched size that we | |
| 1091 | * found is no longer available. If we cut out a smaller piece then | |
| 1092 | * a smaller fragment is now available. | |
| 1093 | */ | |
| 984263bc MD |
1094 | cgp->cg_frsum[allocsiz]--; |
| 1095 | if (frags != allocsiz) | |
| 1096 | cgp->cg_frsum[allocsiz - frags]++; | |
| 1097 | blkno = cg * fs->fs_fpg + bno; | |
| 1098 | if (DOINGSOFTDEP(ITOV(ip))) | |
| 1099 | softdep_setup_blkmapdep(bp, fs, blkno); | |
| 1100 | bdwrite(bp); | |
| 1101 | return ((u_long)blkno); | |
| 1102 | } | |
| 1103 | ||
| 1104 | /* | |
| 1105 | * Allocate a block in a cylinder group. | |
| 1106 | * | |
| 1107 | * This algorithm implements the following policy: | |
| 1108 | * 1) allocate the requested block. | |
| 1109 | * 2) allocate a rotationally optimal block in the same cylinder. | |
| 1110 | * 3) allocate the next available block on the block rotor for the | |
| 1111 | * specified cylinder group. | |
| 1112 | * Note that this routine only allocates fs_bsize blocks; these | |
| 1113 | * blocks may be fragmented by the routine that allocates them. | |
| 1114 | */ | |
| 1115 | static ufs_daddr_t | |
| 0973c589 | 1116 | ffs_alloccgblk(struct inode *ip, struct buf *bp, ufs_daddr_t bpref) |
| 984263bc MD |
1117 | { |
| 1118 | struct fs *fs; | |
| 1119 | struct cg *cgp; | |
| 1120 | ufs_daddr_t bno, blkno; | |
| 1121 | int cylno, pos, delta; | |
| 1122 | short *cylbp; | |
| 3ff2135f | 1123 | int i; |
| f719c866 | 1124 | uint8_t *blksfree; |
| 984263bc MD |
1125 | |
| 1126 | fs = ip->i_fs; | |
| 1127 | cgp = (struct cg *)bp->b_data; | |
| 1128 | blksfree = cg_blksfree(cgp); | |
| 1129 | if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { | |
| 1130 | bpref = cgp->cg_rotor; | |
| 1131 | goto norot; | |
| 1132 | } | |
| 1133 | bpref = blknum(fs, bpref); | |
| 1134 | bpref = dtogd(fs, bpref); | |
| 1135 | /* | |
| 1136 | * if the requested block is available, use it | |
| 1137 | */ | |
| 1138 | if (ffs_isblock(fs, blksfree, fragstoblks(fs, bpref))) { | |
| 1139 | bno = bpref; | |
| 1140 | goto gotit; | |
| 1141 | } | |
| 1142 | if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { | |
| 1143 | /* | |
| 1144 | * Block layout information is not available. | |
| 1145 | * Leaving bpref unchanged means we take the | |
| 1146 | * next available free block following the one | |
| 1147 | * we just allocated. Hopefully this will at | |
| 1148 | * least hit a track cache on drives of unknown | |
| 1149 | * geometry (e.g. SCSI). | |
| 1150 | */ | |
| 1151 | goto norot; | |
| 1152 | } | |
| 1153 | /* | |
| 1154 | * check for a block available on the same cylinder | |
| 1155 | */ | |
| 1156 | cylno = cbtocylno(fs, bpref); | |
| 1157 | if (cg_blktot(cgp)[cylno] == 0) | |
| 1158 | goto norot; | |
| 1159 | /* | |
| 1160 | * check the summary information to see if a block is | |
| 1161 | * available in the requested cylinder starting at the | |
| 1162 | * requested rotational position and proceeding around. | |
| 1163 | */ | |
| 1164 | cylbp = cg_blks(fs, cgp, cylno); | |
| 1165 | pos = cbtorpos(fs, bpref); | |
| 1166 | for (i = pos; i < fs->fs_nrpos; i++) | |
| 1167 | if (cylbp[i] > 0) | |
| 1168 | break; | |
| 1169 | if (i == fs->fs_nrpos) | |
| 1170 | for (i = 0; i < pos; i++) | |
| 1171 | if (cylbp[i] > 0) | |
| 1172 | break; | |
| 1173 | if (cylbp[i] > 0) { | |
| 1174 | /* | |
| 1175 | * found a rotational position, now find the actual | |
| 1176 | * block. A panic if none is actually there. | |
| 1177 | */ | |
| 1178 | pos = cylno % fs->fs_cpc; | |
| 1179 | bno = (cylno - pos) * fs->fs_spc / NSPB(fs); | |
| 1180 | if (fs_postbl(fs, pos)[i] == -1) { | |
| 086c1d7e | 1181 | kprintf("pos = %d, i = %d, fs = %s\n", |
| 984263bc MD |
1182 | pos, i, fs->fs_fsmnt); |
| 1183 | panic("ffs_alloccgblk: cyl groups corrupted"); | |
| 1184 | } | |
| 1185 | for (i = fs_postbl(fs, pos)[i];; ) { | |
| 1186 | if (ffs_isblock(fs, blksfree, bno + i)) { | |
| 1187 | bno = blkstofrags(fs, (bno + i)); | |
| 1188 | goto gotit; | |
| 1189 | } | |
| 1190 | delta = fs_rotbl(fs)[i]; | |
| 1191 | if (delta <= 0 || | |
| 1192 | delta + i > fragstoblks(fs, fs->fs_fpg)) | |
| 1193 | break; | |
| 1194 | i += delta; | |
| 1195 | } | |
| 086c1d7e | 1196 | kprintf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); |
| 984263bc MD |
1197 | panic("ffs_alloccgblk: can't find blk in cyl"); |
| 1198 | } | |
| 1199 | norot: | |
| 1200 | /* | |
| 1201 | * no blocks in the requested cylinder, so take next | |
| 1202 | * available one in this cylinder group. | |
| 1203 | */ | |
| 1204 | bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); | |
| 1205 | if (bno < 0) | |
| 1206 | return (0); | |
| 1207 | cgp->cg_rotor = bno; | |
| 1208 | gotit: | |
| 1209 | blkno = fragstoblks(fs, bno); | |
| 1210 | ffs_clrblock(fs, blksfree, (long)blkno); | |
| 1211 | ffs_clusteracct(fs, cgp, blkno, -1); | |
| 1212 | cgp->cg_cs.cs_nbfree--; | |
| 1213 | fs->fs_cstotal.cs_nbfree--; | |
| 1214 | fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; | |
| 1215 | cylno = cbtocylno(fs, bno); | |
| 1216 | cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; | |
| 1217 | cg_blktot(cgp)[cylno]--; | |
| 1218 | fs->fs_fmod = 1; | |
| 1219 | blkno = cgp->cg_cgx * fs->fs_fpg + bno; | |
| 1220 | if (DOINGSOFTDEP(ITOV(ip))) | |
| 1221 | softdep_setup_blkmapdep(bp, fs, blkno); | |
| 1222 | return (blkno); | |
| 1223 | } | |
| 1224 | ||
| 1225 | /* | |
| 1226 | * Determine whether a cluster can be allocated. | |
| 1227 | * | |
| 1228 | * We do not currently check for optimal rotational layout if there | |
| 1229 | * are multiple choices in the same cylinder group. Instead we just | |
| 1230 | * take the first one that we find following bpref. | |
| 1231 | */ | |
| 1232 | static ufs_daddr_t | |
| 0973c589 | 1233 | ffs_clusteralloc(struct inode *ip, int cg, ufs_daddr_t bpref, int len) |
| 984263bc | 1234 | { |
| 3ff2135f RG |
1235 | struct fs *fs; |
| 1236 | struct cg *cgp; | |
| 984263bc MD |
1237 | struct buf *bp; |
| 1238 | int i, got, run, bno, bit, map; | |
| 1239 | u_char *mapp; | |
| 1240 | int32_t *lp; | |
| f719c866 | 1241 | uint8_t *blksfree; |
| 984263bc MD |
1242 | |
| 1243 | fs = ip->i_fs; | |
| 1244 | if (fs->fs_maxcluster[cg] < len) | |
| 1245 | return (0); | |
| 54078292 MD |
1246 | if (bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), |
| 1247 | (int)fs->fs_cgsize, &bp)) { | |
| 984263bc | 1248 | goto fail; |
| 54078292 | 1249 | } |
| 984263bc MD |
1250 | cgp = (struct cg *)bp->b_data; |
| 1251 | if (!cg_chkmagic(cgp)) | |
| 1252 | goto fail; | |
| 70371608 | 1253 | |
| 984263bc MD |
1254 | /* |
| 1255 | * Check to see if a cluster of the needed size (or bigger) is | |
| 1256 | * available in this cylinder group. | |
| 1257 | */ | |
| 1258 | lp = &cg_clustersum(cgp)[len]; | |
| 1259 | for (i = len; i <= fs->fs_contigsumsize; i++) | |
| 1260 | if (*lp++ > 0) | |
| 1261 | break; | |
| 1262 | if (i > fs->fs_contigsumsize) { | |
| 1263 | /* | |
| 1264 | * This is the first time looking for a cluster in this | |
| 1265 | * cylinder group. Update the cluster summary information | |
| 1266 | * to reflect the true maximum sized cluster so that | |
| 1267 | * future cluster allocation requests can avoid reading | |
| 1268 | * the cylinder group map only to find no clusters. | |
| 1269 | */ | |
| 1270 | lp = &cg_clustersum(cgp)[len - 1]; | |
| 1271 | for (i = len - 1; i > 0; i--) | |
| 1272 | if (*lp-- > 0) | |
| 1273 | break; | |
| 1274 | fs->fs_maxcluster[cg] = i; | |
| 1275 | goto fail; | |
| 1276 | } | |
| 1277 | /* | |
| 1278 | * Search the cluster map to find a big enough cluster. | |
| 1279 | * We take the first one that we find, even if it is larger | |
| 1280 | * than we need as we prefer to get one close to the previous | |
| 1281 | * block allocation. We do not search before the current | |
| 1282 | * preference point as we do not want to allocate a block | |
| 1283 | * that is allocated before the previous one (as we will | |
| 1284 | * then have to wait for another pass of the elevator | |
| 1285 | * algorithm before it will be read). We prefer to fail and | |
| 1286 | * be recalled to try an allocation in the next cylinder group. | |
| 1287 | */ | |
| 1288 | if (dtog(fs, bpref) != cg) | |
| 1289 | bpref = 0; | |
| 1290 | else | |
| 1291 | bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); | |
| 1292 | mapp = &cg_clustersfree(cgp)[bpref / NBBY]; | |
| 1293 | map = *mapp++; | |
| 1294 | bit = 1 << (bpref % NBBY); | |
| 1295 | for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { | |
| 1296 | if ((map & bit) == 0) { | |
| 1297 | run = 0; | |
| 1298 | } else { | |
| 1299 | run++; | |
| 1300 | if (run == len) | |
| 1301 | break; | |
| 1302 | } | |
| 1303 | if ((got & (NBBY - 1)) != (NBBY - 1)) { | |
| 1304 | bit <<= 1; | |
| 1305 | } else { | |
| 1306 | map = *mapp++; | |
| 1307 | bit = 1; | |
| 1308 | } | |
| 1309 | } | |
| 1310 | if (got >= cgp->cg_nclusterblks) | |
| 1311 | goto fail; | |
| 1312 | /* | |
| 1313 | * Allocate the cluster that we have found. | |
| 1314 | */ | |
| 1315 | blksfree = cg_blksfree(cgp); | |
| 5d5d3444 | 1316 | for (i = 1; i <= len; i++) { |
| 984263bc MD |
1317 | if (!ffs_isblock(fs, blksfree, got - run + i)) |
| 1318 | panic("ffs_clusteralloc: map mismatch"); | |
| 5d5d3444 | 1319 | } |
| 984263bc MD |
1320 | bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); |
| 1321 | if (dtog(fs, bno) != cg) | |
| 1322 | panic("ffs_clusteralloc: allocated out of group"); | |
| 1323 | len = blkstofrags(fs, len); | |
| 5d5d3444 | 1324 | for (i = 0; i < len; i += fs->fs_frag) { |
| 984263bc MD |
1325 | if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i) |
| 1326 | panic("ffs_clusteralloc: lost block"); | |
| 5d5d3444 | 1327 | } |
| 984263bc MD |
1328 | bdwrite(bp); |
| 1329 | return (bno); | |
| 1330 | ||
| 1331 | fail: | |
| 1332 | brelse(bp); | |
| 1333 | return (0); | |
| 1334 | } | |
| 1335 | ||
| 1336 | /* | |
| 1337 | * Determine whether an inode can be allocated. | |
| 1338 | * | |
| 1339 | * Check to see if an inode is available, and if it is, | |
| 1340 | * allocate it using the following policy: | |
| 1341 | * 1) allocate the requested inode. | |
| 1342 | * 2) allocate the next available inode after the requested | |
| 1343 | * inode in the specified cylinder group. | |
| b1f7dd27 MD |
1344 | * 3) the inode must not already be in the inode hash table. We |
| 1345 | * can encounter such a case because the vnode reclamation sequence | |
| 1346 | * frees the bit | |
| 1347 | * 3) the inode must not already be in the inode hash, otherwise it | |
| 1348 | * may be in the process of being deallocated. This can occur | |
| 1349 | * because the bitmap is updated before the inode is removed from | |
| 1350 | * hash. If we were to reallocate the inode the caller could wind | |
| 1351 | * up returning a vnode/inode combination which is in an indeterminate | |
| 1352 | * state. | |
| 984263bc MD |
1353 | */ |
| 1354 | static ino_t | |
| 0973c589 | 1355 | ffs_nodealloccg(struct inode *ip, int cg, ufs_daddr_t ipref, int mode) |
| 984263bc | 1356 | { |
| 3ff2135f RG |
1357 | struct fs *fs; |
| 1358 | struct cg *cgp; | |
| 984263bc | 1359 | struct buf *bp; |
| f719c866 | 1360 | uint8_t *inosused; |
| c3274b0a MD |
1361 | uint8_t map; |
| 1362 | int error, len, arraysize, i; | |
| b1f7dd27 MD |
1363 | int icheckmiss; |
| 1364 | ufs_daddr_t ibase; | |
| 984263bc MD |
1365 | |
| 1366 | fs = ip->i_fs; | |
| 1367 | if (fs->fs_cs(fs, cg).cs_nifree == 0) | |
| 1368 | return (0); | |
| 54078292 MD |
1369 | error = bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), |
| 1370 | (int)fs->fs_cgsize, &bp); | |
| 984263bc MD |
1371 | if (error) { |
| 1372 | brelse(bp); | |
| 1373 | return (0); | |
| 1374 | } | |
| 1375 | cgp = (struct cg *)bp->b_data; | |
| 1376 | if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { | |
| 1377 | brelse(bp); | |
| 1378 | return (0); | |
| 1379 | } | |
| 984263bc | 1380 | inosused = cg_inosused(cgp); |
| b1f7dd27 MD |
1381 | icheckmiss = 0; |
| 1382 | ||
| 1383 | /* | |
| 1384 | * Quick check, reuse the most recently free inode or continue | |
| 1385 | * a scan from where we left off the last time. | |
| 1386 | */ | |
| 1387 | ibase = cg * fs->fs_ipg; | |
| 984263bc MD |
1388 | if (ipref) { |
| 1389 | ipref %= fs->fs_ipg; | |
| b1f7dd27 MD |
1390 | if (isclr(inosused, ipref)) { |
| 1391 | if (ufs_ihashcheck(ip->i_dev, ibase + ipref) == 0) | |
| 1392 | goto gotit; | |
| 984263bc MD |
1393 | } |
| 1394 | } | |
| b1f7dd27 MD |
1395 | |
| 1396 | /* | |
| 1397 | * Scan the inode bitmap starting at irotor, be sure to handle | |
| 1398 | * the edge case by going back to the beginning of the array. | |
| c3274b0a MD |
1399 | * |
| 1400 | * If the number of inodes is not byte-aligned, the unused bits | |
| 1401 | * should be set to 1. This will be sanity checked in gotit. Note | |
| 1402 | * that we have to be sure not to overlap the beginning and end | |
| 1403 | * when irotor is in the middle of a byte as this will cause the | |
| 1404 | * same bitmap byte to be checked twice. To solve this problem we | |
| 1405 | * just convert everything to a byte index for the loop. | |
| b1f7dd27 | 1406 | */ |
| c3274b0a MD |
1407 | ipref = (cgp->cg_irotor % fs->fs_ipg) >> 3; /* byte index */ |
| 1408 | len = (fs->fs_ipg + 7) >> 3; /* byte size */ | |
| 1409 | arraysize = len; | |
| b1f7dd27 MD |
1410 | |
| 1411 | while (len > 0) { | |
| c3274b0a MD |
1412 | map = inosused[ipref]; |
| 1413 | if (map != 255) { | |
| b1f7dd27 MD |
1414 | for (i = 0; i < NBBY; ++i) { |
| 1415 | /* | |
| 1416 | * If we find a free bit we have to make sure | |
| 1417 | * that the inode is not in the middle of | |
| 1418 | * being destroyed. The inode should not exist | |
| 1419 | * in the inode hash. | |
| 1420 | * | |
| 1421 | * Adjust the rotor to try to hit the | |
| 1422 | * quick-check up above. | |
| 1423 | */ | |
| 1424 | if ((map & (1 << i)) == 0) { | |
| c3274b0a MD |
1425 | if (ufs_ihashcheck(ip->i_dev, ibase + (ipref << 3) + i) == 0) { |
| 1426 | ipref = (ipref << 3) + i; | |
| b1f7dd27 MD |
1427 | cgp->cg_irotor = (ipref + 1) % fs->fs_ipg; |
| 1428 | goto gotit; | |
| 1429 | } | |
| 1430 | ++icheckmiss; | |
| 1431 | } | |
| 1432 | } | |
| 1433 | } | |
| 1434 | ||
| 1435 | /* | |
| c3274b0a MD |
1436 | * Setup for the next byte, start at the beginning again if |
| 1437 | * we hit the end of the array. | |
| b1f7dd27 | 1438 | */ |
| c3274b0a | 1439 | if (++ipref == arraysize) |
| b1f7dd27 | 1440 | ipref = 0; |
| c3274b0a | 1441 | --len; |
| b1f7dd27 MD |
1442 | } |
| 1443 | if (icheckmiss == cgp->cg_cs.cs_nifree) { | |
| 1444 | brelse(bp); | |
| 1445 | return(0); | |
| 984263bc | 1446 | } |
| 086c1d7e | 1447 | kprintf("fs = %s\n", fs->fs_fsmnt); |
| b1f7dd27 MD |
1448 | panic("ffs_nodealloccg: block not in map, icheckmiss/nfree %d/%d", |
| 1449 | icheckmiss, cgp->cg_cs.cs_nifree); | |
| 984263bc | 1450 | /* NOTREACHED */ |
| c3274b0a MD |
1451 | |
| 1452 | /* | |
| 1453 | * ipref is a bit index as of the gotit label. | |
| 1454 | */ | |
| 984263bc | 1455 | gotit: |
| c3274b0a | 1456 | KKASSERT(ipref >= 0 && ipref < fs->fs_ipg); |
| b1f7dd27 | 1457 | cgp->cg_time = time_second; |
| 984263bc | 1458 | if (DOINGSOFTDEP(ITOV(ip))) |
| b1f7dd27 | 1459 | softdep_setup_inomapdep(bp, ip, ibase + ipref); |
| 984263bc MD |
1460 | setbit(inosused, ipref); |
| 1461 | cgp->cg_cs.cs_nifree--; | |
| 1462 | fs->fs_cstotal.cs_nifree--; | |
| 1463 | fs->fs_cs(fs, cg).cs_nifree--; | |
| 1464 | fs->fs_fmod = 1; | |
| 1465 | if ((mode & IFMT) == IFDIR) { | |
| 1466 | cgp->cg_cs.cs_ndir++; | |
| 1467 | fs->fs_cstotal.cs_ndir++; | |
| 1468 | fs->fs_cs(fs, cg).cs_ndir++; | |
| 1469 | } | |
| 1470 | bdwrite(bp); | |
| b1f7dd27 | 1471 | return (ibase + ipref); |
| 984263bc MD |
1472 | } |
| 1473 | ||
| 1474 | /* | |
| 1475 | * Free a block or fragment. | |
| 1476 | * | |
| 1477 | * The specified block or fragment is placed back in the | |
| 1478 | * free map. If a fragment is deallocated, a possible | |
| 1479 | * block reassembly is checked. | |
| 1480 | */ | |
| 1481 | void | |
| e0fb398b T |
1482 | ffs_blkfree_cg(struct fs * fs, struct vnode * i_devvp, cdev_t i_dev, ino_t i_number, |
| 1483 | uint32_t i_din_uid, ufs_daddr_t bno, long size) | |
| 984263bc | 1484 | { |
| 3ff2135f | 1485 | struct cg *cgp; |
| 984263bc MD |
1486 | struct buf *bp; |
| 1487 | ufs_daddr_t blkno; | |
| 1488 | int i, error, cg, blk, frags, bbase; | |
| f719c866 | 1489 | uint8_t *blksfree; |
| 984263bc | 1490 | |
| e0fb398b | 1491 | VOP_FREEBLKS(i_devvp, fsbtodoff(fs, bno), size); |
| f719c866 | 1492 | if ((uint)size > fs->fs_bsize || fragoff(fs, size) != 0 || |
| 984263bc | 1493 | fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { |
| 086c1d7e | 1494 | kprintf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n", |
| e0fb398b | 1495 | devtoname(i_dev), (long)bno, (long)fs->fs_bsize, size, |
| 984263bc MD |
1496 | fs->fs_fsmnt); |
| 1497 | panic("ffs_blkfree: bad size"); | |
| 1498 | } | |
| 1499 | cg = dtog(fs, bno); | |
| f719c866 | 1500 | if ((uint)bno >= fs->fs_size) { |
| 086c1d7e | 1501 | kprintf("bad block %ld, ino %lu\n", |
| e0fb398b T |
1502 | (long)bno, (u_long)i_number); |
| 1503 | ffs_fserr(fs, i_din_uid, "bad block"); | |
| 984263bc MD |
1504 | return; |
| 1505 | } | |
| 5d5d3444 MD |
1506 | |
| 1507 | /* | |
| 1508 | * Load the cylinder group | |
| 1509 | */ | |
| e0fb398b | 1510 | error = bread(i_devvp, fsbtodoff(fs, cgtod(fs, cg)), |
| 54078292 | 1511 | (int)fs->fs_cgsize, &bp); |
| 984263bc MD |
1512 | if (error) { |
| 1513 | brelse(bp); | |
| 1514 | return; | |
| 1515 | } | |
| 1516 | cgp = (struct cg *)bp->b_data; | |
| 1517 | if (!cg_chkmagic(cgp)) { | |
| 1518 | brelse(bp); | |
| 1519 | return; | |
| 1520 | } | |
| 984263bc MD |
1521 | cgp->cg_time = time_second; |
| 1522 | bno = dtogd(fs, bno); | |
| 1523 | blksfree = cg_blksfree(cgp); | |
| 5d5d3444 | 1524 | |
| 984263bc | 1525 | if (size == fs->fs_bsize) { |
| 5d5d3444 MD |
1526 | /* |
| 1527 | * Free a whole block | |
| 1528 | */ | |
| 984263bc MD |
1529 | blkno = fragstoblks(fs, bno); |
| 1530 | if (!ffs_isfreeblock(fs, blksfree, blkno)) { | |
| 086c1d7e | 1531 | kprintf("dev = %s, block = %ld, fs = %s\n", |
| e0fb398b | 1532 | devtoname(i_dev), (long)bno, fs->fs_fsmnt); |
| 984263bc MD |
1533 | panic("ffs_blkfree: freeing free block"); |
| 1534 | } | |
| 1535 | ffs_setblock(fs, blksfree, blkno); | |
| 1536 | ffs_clusteracct(fs, cgp, blkno, 1); | |
| 1537 | cgp->cg_cs.cs_nbfree++; | |
| 1538 | fs->fs_cstotal.cs_nbfree++; | |
| 1539 | fs->fs_cs(fs, cg).cs_nbfree++; | |
| 1540 | i = cbtocylno(fs, bno); | |
| 1541 | cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; | |
| 1542 | cg_blktot(cgp)[i]++; | |
| 1543 | } else { | |
| 984263bc | 1544 | /* |
| 5d5d3444 MD |
1545 | * Free a fragment within a block. |
| 1546 | * | |
| 1547 | * bno is the starting block number of the fragment being | |
| 1548 | * freed. | |
| 1549 | * | |
| 1550 | * bbase is the starting block number for the filesystem | |
| 1551 | * block containing the fragment. | |
| 1552 | * | |
| 1553 | * blk is the current bitmap for the fragments within the | |
| 1554 | * filesystem block containing the fragment. | |
| 1555 | * | |
| 1556 | * frags is the number of fragments being freed | |
| 1557 | * | |
| 1558 | * Call ffs_fragacct() to account for the removal of all | |
| 1559 | * current fragments, then adjust the bitmap to free the | |
| 1560 | * requested fragment, and finally call ffs_fragacct() again | |
| 1561 | * to regenerate the accounting. | |
| 984263bc | 1562 | */ |
| 5d5d3444 | 1563 | bbase = bno - fragnum(fs, bno); |
| 984263bc MD |
1564 | blk = blkmap(fs, blksfree, bbase); |
| 1565 | ffs_fragacct(fs, blk, cgp->cg_frsum, -1); | |
| 984263bc MD |
1566 | frags = numfrags(fs, size); |
| 1567 | for (i = 0; i < frags; i++) { | |
| 1568 | if (isset(blksfree, bno + i)) { | |
| 086c1d7e | 1569 | kprintf("dev = %s, block = %ld, fs = %s\n", |
| e0fb398b | 1570 | devtoname(i_dev), (long)(bno + i), |
| 984263bc MD |
1571 | fs->fs_fsmnt); |
| 1572 | panic("ffs_blkfree: freeing free frag"); | |
| 1573 | } | |
| 1574 | setbit(blksfree, bno + i); | |
| 1575 | } | |
| 1576 | cgp->cg_cs.cs_nffree += i; | |
| 1577 | fs->fs_cstotal.cs_nffree += i; | |
| 1578 | fs->fs_cs(fs, cg).cs_nffree += i; | |
| 5d5d3444 | 1579 | |
| 984263bc | 1580 | /* |
| 5d5d3444 | 1581 | * Add back in counts associated with the new frags |
| 984263bc MD |
1582 | */ |
| 1583 | blk = blkmap(fs, blksfree, bbase); | |
| 1584 | ffs_fragacct(fs, blk, cgp->cg_frsum, 1); | |
| 5d5d3444 | 1585 | |
| 984263bc | 1586 | /* |
| 5d5d3444 | 1587 | * If a complete block has been reassembled, account for it |
| 984263bc MD |
1588 | */ |
| 1589 | blkno = fragstoblks(fs, bbase); | |
| 1590 | if (ffs_isblock(fs, blksfree, blkno)) { | |
| 1591 | cgp->cg_cs.cs_nffree -= fs->fs_frag; | |
| 1592 | fs->fs_cstotal.cs_nffree -= fs->fs_frag; | |
| 1593 | fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; | |
| 1594 | ffs_clusteracct(fs, cgp, blkno, 1); | |
| 1595 | cgp->cg_cs.cs_nbfree++; | |
| 1596 | fs->fs_cstotal.cs_nbfree++; | |
| 1597 | fs->fs_cs(fs, cg).cs_nbfree++; | |
| 1598 | i = cbtocylno(fs, bbase); | |
| 1599 | cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; | |
| 1600 | cg_blktot(cgp)[i]++; | |
| 1601 | } | |
| 1602 | } | |
| 1603 | fs->fs_fmod = 1; | |
| 1604 | bdwrite(bp); | |
| 1605 | } | |
| 1606 | ||
| e0fb398b T |
1607 | struct ffs_blkfree_trim_params { |
| 1608 | struct task task; | |
| 1609 | ufs_daddr_t bno; | |
| 1610 | long size; | |
| 1611 | ||
| 1612 | /* | |
| 1613 | * With TRIM, inode pointer is gone in the callback but we still need | |
| 1614 | * the following fields for ffs_blkfree_cg() | |
| 1615 | */ | |
| 1616 | struct vnode *i_devvp; | |
| 1617 | struct fs *i_fs; | |
| 1618 | cdev_t i_dev; | |
| 1619 | ino_t i_number; | |
| 1620 | uint32_t i_din_uid; | |
| 1621 | }; | |
| 1622 | ||
| 1623 | ||
| 1624 | static void | |
| 1625 | ffs_blkfree_trim_task(void *ctx, int pending) | |
| 1626 | { | |
| 1627 | struct ffs_blkfree_trim_params *tp; | |
| 1628 | ||
| 1629 | tp = ctx; | |
| 1630 | ffs_blkfree_cg(tp->i_fs, tp->i_devvp, tp->i_dev, tp->i_number, | |
| 1631 | tp->i_din_uid, tp->bno, tp->size); | |
| 1632 | kfree(tp, M_TEMP); | |
| 1633 | } | |
| 1634 | ||
| 1635 | ||
| 1636 | ||
| 1637 | static void | |
| 1638 | ffs_blkfree_trim_completed(struct bio *biop) | |
| 1639 | { | |
| 1640 | struct buf *bp = biop->bio_buf; | |
| 1641 | struct ffs_blkfree_trim_params *tp; | |
| 1642 | ||
| 1643 | tp = bp->b_bio1.bio_caller_info1.ptr; | |
| 1644 | TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp); | |
| 1645 | tp = biop->bio_caller_info1.ptr; | |
| 1646 | taskqueue_enqueue(taskqueue_swi, &tp->task); | |
| 1647 | biodone(biop); | |
| 1648 | } | |
| 1649 | ||
| 1650 | ||
| 1651 | /* | |
| 1652 | * If TRIM is enabled, we TRIM the blocks first then free them. We do this | |
| 1653 | * after TRIM is finished and the callback handler is called. The logic here | |
| 1654 | * is that we free the blocks before updating the bitmap so that we don't | |
| 1655 | * reuse a block before we actually trim it, which would result in trimming | |
| 1656 | * a valid block. | |
| 1657 | */ | |
| 1658 | void | |
| 1659 | ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size) | |
| 1660 | { | |
| 1661 | struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount);; | |
| 1662 | struct ffs_blkfree_trim_params *tp; | |
| 1663 | ||
| 1664 | if (!(ump->um_mountp->mnt_flag & MNT_TRIM)) { | |
| 1665 | ffs_blkfree_cg(ip->i_fs, ip->i_devvp,ip->i_dev,ip->i_number, | |
| 1666 | ip->i_uid, bno, size); | |
| 1667 | return; | |
| 1668 | } | |
| 1669 | ||
| 1670 | struct buf *bp; | |
| 1671 | ||
| 1672 | tp = kmalloc(sizeof(struct ffs_blkfree_trim_params), M_TEMP, M_WAITOK); | |
| 1673 | tp->bno = bno; | |
| 1674 | tp->i_fs= ip->i_fs; | |
| 1675 | tp->i_devvp = ip->i_devvp; | |
| 1676 | tp->i_dev = ip->i_dev; | |
| 1677 | tp->i_din_uid = ip->i_uid; | |
| 1678 | tp->i_number = ip->i_number; | |
| 1679 | tp->size = size; | |
| 1680 | ||
| 1681 | bp = getnewbuf(0,0,0,1); | |
| 1682 | BUF_KERNPROC(bp); | |
| 1683 | bp->b_cmd = BUF_CMD_FREEBLKS; | |
| 1684 | bp->b_bio1.bio_offset = fsbtodoff(ip->i_fs, bno); | |
| 1685 | bp->b_bcount = size; | |
| 1686 | bp->b_bio1.bio_caller_info1.ptr = tp; | |
| 1687 | bp->b_bio1.bio_done = ffs_blkfree_trim_completed; | |
| 1688 | vn_strategy(ip->i_devvp, &bp->b_bio1); | |
| 1689 | } | |
| 1690 | ||
| 984263bc MD |
1691 | #ifdef DIAGNOSTIC |
| 1692 | /* | |
| 1693 | * Verify allocation of a block or fragment. Returns true if block or | |
| 1694 | * fragment is allocated, false if it is free. | |
| 1695 | */ | |
| 1696 | static int | |
| 0973c589 | 1697 | ffs_checkblk(struct inode *ip, ufs_daddr_t bno, long size) |
| 984263bc MD |
1698 | { |
| 1699 | struct fs *fs; | |
| 1700 | struct cg *cgp; | |
| 1701 | struct buf *bp; | |
| 1702 | int i, error, frags, free; | |
| f719c866 | 1703 | uint8_t *blksfree; |
| 984263bc MD |
1704 | |
| 1705 | fs = ip->i_fs; | |
| f719c866 | 1706 | if ((uint)size > fs->fs_bsize || fragoff(fs, size) != 0) { |
| 086c1d7e | 1707 | kprintf("bsize = %ld, size = %ld, fs = %s\n", |
| 984263bc MD |
1708 | (long)fs->fs_bsize, size, fs->fs_fsmnt); |
| 1709 | panic("ffs_checkblk: bad size"); | |
| 1710 | } | |
| f719c866 | 1711 | if ((uint)bno >= fs->fs_size) |
| 984263bc | 1712 | panic("ffs_checkblk: bad block %d", bno); |
| 54078292 MD |
1713 | error = bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, dtog(fs, bno))), |
| 1714 | (int)fs->fs_cgsize, &bp); | |
| 984263bc MD |
1715 | if (error) |
| 1716 | panic("ffs_checkblk: cg bread failed"); | |
| 1717 | cgp = (struct cg *)bp->b_data; | |
| 1718 | if (!cg_chkmagic(cgp)) | |
| 1719 | panic("ffs_checkblk: cg magic mismatch"); | |
| 984263bc MD |
1720 | blksfree = cg_blksfree(cgp); |
| 1721 | bno = dtogd(fs, bno); | |
| 1722 | if (size == fs->fs_bsize) { | |
| 1723 | free = ffs_isblock(fs, blksfree, fragstoblks(fs, bno)); | |
| 1724 | } else { | |
| 1725 | frags = numfrags(fs, size); | |
| 1726 | for (free = 0, i = 0; i < frags; i++) | |
| 1727 | if (isset(blksfree, bno + i)) | |
| 1728 | free++; | |
| 1729 | if (free != 0 && free != frags) | |
| 1730 | panic("ffs_checkblk: partially free fragment"); | |
| 1731 | } | |
| 1732 | brelse(bp); | |
| 1733 | return (!free); | |
| 1734 | } | |
| 1735 | #endif /* DIAGNOSTIC */ | |
| 1736 | ||
| 1737 | /* | |
| 1738 | * Free an inode. | |
| 1739 | */ | |
| 1740 | int | |
| 0973c589 | 1741 | ffs_vfree(struct vnode *pvp, ino_t ino, int mode) |
| 984263bc MD |
1742 | { |
| 1743 | if (DOINGSOFTDEP(pvp)) { | |
| 1744 | softdep_freefile(pvp, ino, mode); | |
| 1745 | return (0); | |
| 1746 | } | |
| 1747 | return (ffs_freefile(pvp, ino, mode)); | |
| 1748 | } | |
| 1749 | ||
| 1750 | /* | |
| 1751 | * Do the actual free operation. | |
| 1752 | * The specified inode is placed back in the free map. | |
| 1753 | */ | |
| 0973c589 CP |
1754 | int |
| 1755 | ffs_freefile(struct vnode *pvp, ino_t ino, int mode) | |
| 984263bc | 1756 | { |
| 3ff2135f RG |
1757 | struct fs *fs; |
| 1758 | struct cg *cgp; | |
| 1759 | struct inode *pip; | |
| 984263bc MD |
1760 | struct buf *bp; |
| 1761 | int error, cg; | |
| f719c866 | 1762 | uint8_t *inosused; |
| 984263bc MD |
1763 | |
| 1764 | pip = VTOI(pvp); | |
| 1765 | fs = pip->i_fs; | |
| f719c866 | 1766 | if ((uint)ino >= fs->fs_ipg * fs->fs_ncg) |
| f91a71dd | 1767 | panic("ffs_vfree: range: dev = (%d,%d), ino = %"PRId64", fs = %s", |
| 984263bc MD |
1768 | major(pip->i_dev), minor(pip->i_dev), ino, fs->fs_fsmnt); |
| 1769 | cg = ino_to_cg(fs, ino); | |
| 54078292 MD |
1770 | error = bread(pip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), |
| 1771 | (int)fs->fs_cgsize, &bp); | |
| 984263bc MD |
1772 | if (error) { |
| 1773 | brelse(bp); | |
| 1774 | return (error); | |
| 1775 | } | |
| 1776 | cgp = (struct cg *)bp->b_data; | |
| 1777 | if (!cg_chkmagic(cgp)) { | |
| 1778 | brelse(bp); | |
| 1779 | return (0); | |
| 1780 | } | |
| 984263bc MD |
1781 | cgp->cg_time = time_second; |
| 1782 | inosused = cg_inosused(cgp); | |
| 1783 | ino %= fs->fs_ipg; | |
| 1784 | if (isclr(inosused, ino)) { | |
| 086c1d7e | 1785 | kprintf("dev = %s, ino = %lu, fs = %s\n", |
| 984263bc MD |
1786 | devtoname(pip->i_dev), (u_long)ino, fs->fs_fsmnt); |
| 1787 | if (fs->fs_ronly == 0) | |
| 1788 | panic("ffs_vfree: freeing free inode"); | |
| 1789 | } | |
| 1790 | clrbit(inosused, ino); | |
| 1791 | if (ino < cgp->cg_irotor) | |
| 1792 | cgp->cg_irotor = ino; | |
| 1793 | cgp->cg_cs.cs_nifree++; | |
| 1794 | fs->fs_cstotal.cs_nifree++; | |
| 1795 | fs->fs_cs(fs, cg).cs_nifree++; | |
| 1796 | if ((mode & IFMT) == IFDIR) { | |
| 1797 | cgp->cg_cs.cs_ndir--; | |
| 1798 | fs->fs_cstotal.cs_ndir--; | |
| 1799 | fs->fs_cs(fs, cg).cs_ndir--; | |
| 1800 | } | |
| 1801 | fs->fs_fmod = 1; | |
| 1802 | bdwrite(bp); | |
| 1803 | return (0); | |
| 1804 | } | |
| 1805 | ||
| 1806 | /* | |
| 1807 | * Find a block of the specified size in the specified cylinder group. | |
| 1808 | * | |
| 1809 | * It is a panic if a request is made to find a block if none are | |
| 1810 | * available. | |
| 1811 | */ | |
| 1812 | static ufs_daddr_t | |
| 0973c589 | 1813 | ffs_mapsearch(struct fs *fs, struct cg *cgp, ufs_daddr_t bpref, int allocsiz) |
| 984263bc MD |
1814 | { |
| 1815 | ufs_daddr_t bno; | |
| 1816 | int start, len, loc, i; | |
| 1817 | int blk, field, subfield, pos; | |
| f719c866 | 1818 | uint8_t *blksfree; |
| 984263bc MD |
1819 | |
| 1820 | /* | |
| 1821 | * find the fragment by searching through the free block | |
| 5d5d3444 | 1822 | * map for an appropriate bit pattern. |
| 984263bc MD |
1823 | */ |
| 1824 | if (bpref) | |
| 1825 | start = dtogd(fs, bpref) / NBBY; | |
| 1826 | else | |
| 1827 | start = cgp->cg_frotor / NBBY; | |
| 1828 | blksfree = cg_blksfree(cgp); | |
| 1829 | len = howmany(fs->fs_fpg, NBBY) - start; | |
| f719c866 | 1830 | loc = scanc((uint)len, (u_char *)&blksfree[start], |
| 984263bc MD |
1831 | (u_char *)fragtbl[fs->fs_frag], |
| 1832 | (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); | |
| 1833 | if (loc == 0) { | |
| 5d5d3444 | 1834 | len = start + 1; /* XXX why overlap here? */ |
| 984263bc | 1835 | start = 0; |
| f719c866 | 1836 | loc = scanc((uint)len, (u_char *)&blksfree[0], |
| 984263bc MD |
1837 | (u_char *)fragtbl[fs->fs_frag], |
| 1838 | (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); | |
| 1839 | if (loc == 0) { | |
| 086c1d7e | 1840 | kprintf("start = %d, len = %d, fs = %s\n", |
| 984263bc MD |
1841 | start, len, fs->fs_fsmnt); |
| 1842 | panic("ffs_alloccg: map corrupted"); | |
| 1843 | /* NOTREACHED */ | |
| 1844 | } | |
| 1845 | } | |
| 1846 | bno = (start + len - loc) * NBBY; | |
| 1847 | cgp->cg_frotor = bno; | |
| 1848 | /* | |
| 1849 | * found the byte in the map | |
| 1850 | * sift through the bits to find the selected frag | |
| 1851 | */ | |
| 1852 | for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { | |
| 1853 | blk = blkmap(fs, blksfree, bno); | |
| 1854 | blk <<= 1; | |
| 1855 | field = around[allocsiz]; | |
| 1856 | subfield = inside[allocsiz]; | |
| 1857 | for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { | |
| 1858 | if ((blk & field) == subfield) | |
| 1859 | return (bno + pos); | |
| 1860 | field <<= 1; | |
| 1861 | subfield <<= 1; | |
| 1862 | } | |
| 1863 | } | |
| 086c1d7e | 1864 | kprintf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); |
| 984263bc MD |
1865 | panic("ffs_alloccg: block not in map"); |
| 1866 | return (-1); | |
| 1867 | } | |
| 1868 | ||
| 1869 | /* | |
| 1870 | * Update the cluster map because of an allocation or free. | |
| 1871 | * | |
| 1872 | * Cnt == 1 means free; cnt == -1 means allocating. | |
| 1873 | */ | |
| 1874 | static void | |
| 0973c589 | 1875 | ffs_clusteracct(struct fs *fs, struct cg *cgp, ufs_daddr_t blkno, int cnt) |
| 984263bc MD |
1876 | { |
| 1877 | int32_t *sump; | |
| 1878 | int32_t *lp; | |
| 1879 | u_char *freemapp, *mapp; | |
| 1880 | int i, start, end, forw, back, map, bit; | |
| 1881 | ||
| 1882 | if (fs->fs_contigsumsize <= 0) | |
| 1883 | return; | |
| 1884 | freemapp = cg_clustersfree(cgp); | |
| 1885 | sump = cg_clustersum(cgp); | |
| 1886 | /* | |
| 1887 | * Allocate or clear the actual block. | |
| 1888 | */ | |
| 1889 | if (cnt > 0) | |
| 1890 | setbit(freemapp, blkno); | |
| 1891 | else | |
| 1892 | clrbit(freemapp, blkno); | |
| 1893 | /* | |
| 1894 | * Find the size of the cluster going forward. | |
| 1895 | */ | |
| 1896 | start = blkno + 1; | |
| 1897 | end = start + fs->fs_contigsumsize; | |
| 1898 | if (end >= cgp->cg_nclusterblks) | |
| 1899 | end = cgp->cg_nclusterblks; | |
| 1900 | mapp = &freemapp[start / NBBY]; | |
| 1901 | map = *mapp++; | |
| 1902 | bit = 1 << (start % NBBY); | |
| 1903 | for (i = start; i < end; i++) { | |
| 1904 | if ((map & bit) == 0) | |
| 1905 | break; | |
| 1906 | if ((i & (NBBY - 1)) != (NBBY - 1)) { | |
| 1907 | bit <<= 1; | |
| 1908 | } else { | |
| 1909 | map = *mapp++; | |
| 1910 | bit = 1; | |
| 1911 | } | |
| 1912 | } | |
| 1913 | forw = i - start; | |
| 1914 | /* | |
| 1915 | * Find the size of the cluster going backward. | |
| 1916 | */ | |
| 1917 | start = blkno - 1; | |
| 1918 | end = start - fs->fs_contigsumsize; | |
| 1919 | if (end < 0) | |
| 1920 | end = -1; | |
| 1921 | mapp = &freemapp[start / NBBY]; | |
| 1922 | map = *mapp--; | |
| 1923 | bit = 1 << (start % NBBY); | |
| 1924 | for (i = start; i > end; i--) { | |
| 1925 | if ((map & bit) == 0) | |
| 1926 | break; | |
| 1927 | if ((i & (NBBY - 1)) != 0) { | |
| 1928 | bit >>= 1; | |
| 1929 | } else { | |
| 1930 | map = *mapp--; | |
| 1931 | bit = 1 << (NBBY - 1); | |
| 1932 | } | |
| 1933 | } | |
| 1934 | back = start - i; | |
| 1935 | /* | |
| 1936 | * Account for old cluster and the possibly new forward and | |
| 1937 | * back clusters. | |
| 1938 | */ | |
| 1939 | i = back + forw + 1; | |
| 1940 | if (i > fs->fs_contigsumsize) | |
| 1941 | i = fs->fs_contigsumsize; | |
| 1942 | sump[i] += cnt; | |
| 1943 | if (back > 0) | |
| 1944 | sump[back] -= cnt; | |
| 1945 | if (forw > 0) | |
| 1946 | sump[forw] -= cnt; | |
| 1947 | /* | |
| 1948 | * Update cluster summary information. | |
| 1949 | */ | |
| 1950 | lp = &sump[fs->fs_contigsumsize]; | |
| 1951 | for (i = fs->fs_contigsumsize; i > 0; i--) | |
| 1952 | if (*lp-- > 0) | |
| 1953 | break; | |
| 1954 | fs->fs_maxcluster[cgp->cg_cgx] = i; | |
| 1955 | } | |
| 1956 | ||
| 1957 | /* | |
| f719c866 | 1958 | * Fserr prints the name of a filesystem with an error diagnostic. |
| 984263bc MD |
1959 | * |
| 1960 | * The form of the error message is: | |
| 1961 | * fs: error message | |
| 1962 | */ | |
| 1963 | static void | |
| f719c866 | 1964 | ffs_fserr(struct fs *fs, uint uid, char *cp) |
| 984263bc | 1965 | { |
| 05edc21a MD |
1966 | struct thread *td = curthread; |
| 1967 | struct proc *p; | |
| 984263bc | 1968 | |
| 05edc21a MD |
1969 | if ((p = td->td_proc) != NULL) { |
| 1970 | log(LOG_ERR, "pid %d (%s), uid %d on %s: %s\n", p ? p->p_pid : -1, | |
| 1971 | p ? p->p_comm : "-", uid, fs->fs_fsmnt, cp); | |
| 1972 | } else { | |
| 1973 | log(LOG_ERR, "system thread %p, uid %d on %s: %s\n", | |
| 1974 | td, uid, fs->fs_fsmnt, cp); | |
| 1975 | } | |
| 984263bc | 1976 | } |