2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * HAMMER PFS ioctls - Manage pseudo-fs configurations
40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
42 static int hammer_pfs_rollback(hammer_transaction_t trans,
43 hammer_pseudofs_inmem_t pfsm,
44 hammer_tid_t trunc_tid);
45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
46 hammer_tid_t trunc_tid);
49 * Get mirroring/pseudo-fs information
51 * NOTE: The ip used for ioctl is not necessarily related to the PFS
52 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
55 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
56 struct hammer_ioc_pseudofs_rw *pfs)
58 hammer_pseudofs_inmem_t pfsm;
59 uint32_t localization;
62 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
64 localization = pfs_to_lo(pfs->pfs_id);
65 pfs->bytes = sizeof(struct hammer_pseudofs_data);
66 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
68 pfsm = hammer_load_pseudofs(trans, localization, &error);
70 hammer_rel_pseudofs(trans->hmp, pfsm);
75 * If the PFS is a master the sync tid is set by normal operation
76 * rather than the mirroring code, and will always track the
77 * real HAMMER filesystem.
79 * We use flush_tid1, which is the highest fully committed TID.
80 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
81 * caught up to it yet so a crash will roll us back to flush_tid1.
83 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
84 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
87 * Copy out to userland.
90 if (pfs->ondisk && error == 0)
91 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
92 hammer_rel_pseudofs(trans->hmp, pfsm);
97 * Set mirroring/pseudo-fs information
99 * NOTE: The ip used for ioctl is not necessarily related to the PFS
100 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
104 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
106 hammer_pseudofs_inmem_t pfsm;
107 uint32_t localization;
110 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
112 localization = pfs_to_lo(pfs->pfs_id);
113 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
117 * Make sure a caller isn't creating a PFS from non-root PFS.
119 if (lo_to_pfs(ip->obj_localization) != 0) {
120 hmkprintf(trans->hmp,
121 "Creating a PFS from non-root PFS is not allowed\n");
125 if (error == 0 && pfs->ondisk) {
127 * Load the PFS so we can modify our in-core copy. Ignore
130 pfsm = hammer_load_pseudofs(trans, localization, &error);
131 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
134 * Save it back, create a root inode if we are in master
135 * mode and no root exists.
137 * We do not create root inodes for slaves, the root inode
138 * must be mirrored from the master.
141 (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
142 error = hammer_mkroot_pseudofs(trans, cred, pfsm);
145 error = hammer_save_pseudofs(trans, pfsm);
148 * Wakeup anyone waiting for a TID update for this PFS
150 wakeup(&pfsm->pfsd.sync_end_tid);
151 hammer_rel_pseudofs(trans->hmp, pfsm);
157 * Upgrade a slave to a master
159 * This is fairly easy to do, but we must physically undo any partial syncs
160 * for transaction ids > sync_end_tid. Effective, we must do a partial
163 * NOTE: The ip used for ioctl is not necessarily related to the PFS
164 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
167 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
168 struct hammer_ioc_pseudofs_rw *pfs)
170 hammer_pseudofs_inmem_t pfsm;
171 uint32_t localization;
174 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
176 localization = pfs_to_lo(pfs->pfs_id);
177 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
181 * A master id must be set when upgrading
183 pfsm = hammer_load_pseudofs(trans, localization, &error);
185 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
186 error = hammer_pfs_rollback(trans, pfsm,
187 pfsm->pfsd.sync_end_tid + 1);
189 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
190 error = hammer_save_pseudofs(trans, pfsm);
194 hammer_rel_pseudofs(trans->hmp, pfsm);
195 if (error == EINTR) {
196 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
203 * Downgrade a master to a slave
205 * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
207 * We previously did not update sync_end_tid in consideration for a slave
208 * upgraded to a master and then downgraded again, but this completely breaks
209 * the case where one starts with a master and then downgrades to a slave,
210 * then upgrades again.
212 * NOTE: The ip used for ioctl is not necessarily related to the PFS
213 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
216 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
217 struct hammer_ioc_pseudofs_rw *pfs)
219 hammer_mount_t hmp = trans->hmp;
220 hammer_pseudofs_inmem_t pfsm;
221 uint32_t localization;
224 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
226 localization = pfs_to_lo(pfs->pfs_id);
227 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
230 pfsm = hammer_load_pseudofs(trans, localization, &error);
232 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
233 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
234 if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1)
235 pfsm->pfsd.sync_end_tid = hmp->flush_tid1;
236 error = hammer_save_pseudofs(trans, pfsm);
239 hammer_rel_pseudofs(trans->hmp, pfsm);
246 * We can destroy a PFS by scanning and deleting all of its records in the
247 * B-Tree. The hammer utility will delete the softlink in the primary
250 * NOTE: The ip used for ioctl is not necessarily related to the PFS
251 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
254 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
255 struct hammer_ioc_pseudofs_rw *pfs)
257 hammer_pseudofs_inmem_t pfsm;
258 uint32_t localization;
261 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
263 localization = pfs_to_lo(pfs->pfs_id);
265 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
268 pfsm = hammer_load_pseudofs(trans, localization, &error);
270 error = hammer_pfs_rollback(trans, pfsm, 0);
272 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
273 error = hammer_save_pseudofs(trans, pfsm);
276 hammer_rel_pseudofs(trans->hmp, pfsm);
277 if (error == EINTR) {
278 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
285 * Wait for the PFS to sync past the specified TID
288 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
289 struct hammer_ioc_pseudofs_rw *pfs)
291 hammer_pseudofs_inmem_t pfsm;
292 struct hammer_pseudofs_data pfsd;
293 uint32_t localization;
298 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
300 localization = pfs_to_lo(pfs->pfs_id);
302 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
305 pfsm = hammer_load_pseudofs(trans, localization, &error);
307 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
308 tid = pfsm->pfsd.sync_end_tid;
309 waitp = &pfsm->pfsd.sync_end_tid;
311 tid = trans->hmp->flush_tid1;
312 waitp = &trans->hmp->flush_tid1;
314 if (tid <= pfsd.sync_end_tid)
315 tsleep(waitp, PCATCH, "hmrmwt", 0);
317 hammer_rel_pseudofs(trans->hmp, pfsm);
318 if (error == EINTR) {
319 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
326 * Iterate PFS ondisk data.
327 * This function basically does the same as hammer_load_pseudofs()
328 * except that the purpose of this function is to retrieve data.
330 * NOTE: The ip used for ioctl is not necessarily related to the PFS
331 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
334 hammer_ioc_iterate_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
335 struct hammer_ioc_pfs_iterate *pi)
337 struct hammer_cursor cursor;
338 struct hammer_ioc_pseudofs_rw pfs;
340 uint32_t localization;
344 * struct hammer_ioc_pfs_iterate was never necessary.
345 * This ioctl needs extra code only to do conversion.
346 * The name pi->pos is misleading, but it's been exposed
347 * to userspace header..
349 bzero(&pfs, sizeof(pfs));
350 pfs.pfs_id = pi->pos;
351 pfs.bytes = sizeof(struct hammer_pseudofs_data); /* dummy */
352 if ((error = hammer_pfs_autodetect(&pfs, ip)) != 0)
354 pi->pos = pfs.pfs_id;
355 localization = pfs_to_lo(pi->pos);
357 dip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID,
358 HAMMER_DEF_LOCALIZATION, 0, &error);
360 error = hammer_init_cursor(trans, &cursor,
361 (dip ? &dip->cache[1] : NULL), dip);
365 cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION |
366 HAMMER_LOCALIZE_MISC;
367 cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
368 cursor.key_beg.create_tid = 0;
369 cursor.key_beg.delete_tid = 0;
370 cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS;
371 cursor.key_beg.obj_type = 0;
372 cursor.key_beg.key = localization;
373 cursor.asof = HAMMER_MAX_TID;
374 cursor.flags |= HAMMER_CURSOR_ASOF;
376 error = hammer_ip_lookup(&cursor);
378 error = hammer_ip_resolve_data(&cursor);
381 copyout(cursor.data, pi->ondisk, cursor.leaf->data_len);
382 localization = cursor.leaf->base.key;
383 pi->pos = lo_to_pfs(localization);
385 * Caller needs to increment pi->pos each time calling
386 * this ioctl. This ioctl only restores current PFS id.
391 hammer_done_cursor(&cursor);
393 hammer_rel_inode(dip, 0);
398 * Auto-detect the pseudofs and do basic bounds checking.
402 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
406 if (pfs->pfs_id == -1)
407 pfs->pfs_id = lo_to_pfs(ip->obj_localization);
408 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
410 if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
416 * Rollback the specified PFS to (trunc_tid - 1), removing everything
417 * greater or equal to trunc_tid. The PFS must not have been in no-mirror
418 * mode or the MIRROR_FILTERED scan will not work properly.
420 * This is typically used to remove any partial syncs when upgrading a
421 * slave to a master. It can theoretically also be used to rollback
422 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
423 * PRUNED, and to points that are older only if they are on a retained
424 * (pruning softlink) boundary.
426 * Rollbacks destroy information. If you don't mind inode numbers changing
427 * a better way would be to cpdup a snapshot back onto the master.
431 hammer_pfs_rollback(hammer_transaction_t trans,
432 hammer_pseudofs_inmem_t pfsm,
433 hammer_tid_t trunc_tid)
435 struct hammer_cmirror cmirror;
436 struct hammer_cursor cursor;
437 struct hammer_base_elm key_cur;
441 bzero(&cmirror, sizeof(cmirror));
442 bzero(&key_cur, sizeof(key_cur));
443 key_cur.localization = HAMMER_MIN_LOCALIZATION | pfsm->localization;
444 key_cur.obj_id = HAMMER_MIN_OBJID;
445 key_cur.key = HAMMER_MIN_KEY;
446 key_cur.create_tid = 1;
447 key_cur.rec_type = HAMMER_MIN_RECTYPE;
449 seq = trans->hmp->flusher.done;
452 error = hammer_init_cursor(trans, &cursor, NULL, NULL);
454 hammer_done_cursor(&cursor);
457 cursor.key_beg = key_cur;
458 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION |
460 cursor.key_end.obj_id = HAMMER_MAX_OBJID;
461 cursor.key_end.key = HAMMER_MAX_KEY;
462 cursor.key_end.create_tid = HAMMER_MAX_TID;
463 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
465 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
466 cursor.flags |= HAMMER_CURSOR_BACKEND;
469 * Do an optimized scan of only records created or modified
470 * >= trunc_tid, so we can fix up those records. We must
471 * still check the TIDs but this greatly reduces the size of
474 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
475 cursor.cmirror = &cmirror;
476 cmirror.mirror_tid = trunc_tid;
478 error = hammer_btree_first(&cursor);
481 * Abort the rollback.
484 error = hammer_signal_check(trans->hmp);
490 * We only care about leafs. Internal nodes can be returned
491 * in mirror-filtered mode (they are used to generate SKIP
492 * mrecords), but we don't need them for this code.
494 * WARNING: See warnings in hammer_unlock_cursor() function.
496 cursor.flags |= HAMMER_CURSOR_ATEDISK;
497 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
498 key_cur = cursor.node->ondisk->elms[cursor.index].base;
499 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
502 while (hammer_flusher_meta_halflimit(trans->hmp) ||
503 hammer_flusher_undo_exhausted(trans, 2)) {
504 hammer_unlock_cursor(&cursor);
505 hammer_flusher_wait(trans->hmp, seq);
506 hammer_lock_cursor(&cursor);
507 seq = hammer_flusher_async_one(trans->hmp);
511 error = hammer_btree_iterate(&cursor);
515 hammer_done_cursor(&cursor);
516 if (error == EDEADLK)
523 * Helper function - perform rollback on a B-Tree element given trunc_tid.
525 * If create_tid >= trunc_tid the record is physically destroyed.
526 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
530 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
532 hammer_btree_leaf_elm_t elm;
535 elm = &cursor->node->ondisk->elms[cursor->index].leaf;
536 if (elm->base.create_tid < trunc_tid &&
537 elm->base.delete_tid < trunc_tid) {
541 if (elm->base.create_tid >= trunc_tid) {
542 error = hammer_delete_at_cursor(
543 cursor, HAMMER_DELETE_DESTROY,
544 cursor->trans->tid, cursor->trans->time32,
546 } else if (elm->base.delete_tid >= trunc_tid) {
547 error = hammer_delete_at_cursor(
548 cursor, HAMMER_DELETE_ADJUST,