e0551cbd001e6afeeded8fdf14a82a3a8936d149
[dragonfly.git] / sys / vfs / hammer / hammer_vnops.c
1 /*
2  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.46 2008/05/03 20:21:20 dillon Exp $
35  */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/namecache.h>
42 #include <sys/vnode.h>
43 #include <sys/lockf.h>
44 #include <sys/event.h>
45 #include <sys/stat.h>
46 #include <sys/dirent.h>
47 #include <vm/vm_extern.h>
48 #include <vfs/fifofs/fifo.h>
49 #include "hammer.h"
50
51 /*
52  * USERFS VNOPS
53  */
54 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
55 static int hammer_vop_fsync(struct vop_fsync_args *);
56 static int hammer_vop_read(struct vop_read_args *);
57 static int hammer_vop_write(struct vop_write_args *);
58 static int hammer_vop_access(struct vop_access_args *);
59 static int hammer_vop_advlock(struct vop_advlock_args *);
60 static int hammer_vop_close(struct vop_close_args *);
61 static int hammer_vop_ncreate(struct vop_ncreate_args *);
62 static int hammer_vop_getattr(struct vop_getattr_args *);
63 static int hammer_vop_nresolve(struct vop_nresolve_args *);
64 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
65 static int hammer_vop_nlink(struct vop_nlink_args *);
66 static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
67 static int hammer_vop_nmknod(struct vop_nmknod_args *);
68 static int hammer_vop_open(struct vop_open_args *);
69 static int hammer_vop_pathconf(struct vop_pathconf_args *);
70 static int hammer_vop_print(struct vop_print_args *);
71 static int hammer_vop_readdir(struct vop_readdir_args *);
72 static int hammer_vop_readlink(struct vop_readlink_args *);
73 static int hammer_vop_nremove(struct vop_nremove_args *);
74 static int hammer_vop_nrename(struct vop_nrename_args *);
75 static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
76 static int hammer_vop_setattr(struct vop_setattr_args *);
77 static int hammer_vop_strategy(struct vop_strategy_args *);
78 static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
79 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
80 static int hammer_vop_ioctl(struct vop_ioctl_args *);
81 static int hammer_vop_mountctl(struct vop_mountctl_args *);
82
83 static int hammer_vop_fifoclose (struct vop_close_args *);
84 static int hammer_vop_fiforead (struct vop_read_args *);
85 static int hammer_vop_fifowrite (struct vop_write_args *);
86
87 static int hammer_vop_specclose (struct vop_close_args *);
88 static int hammer_vop_specread (struct vop_read_args *);
89 static int hammer_vop_specwrite (struct vop_write_args *);
90
91 struct vop_ops hammer_vnode_vops = {
92         .vop_default =          vop_defaultop,
93         .vop_fsync =            hammer_vop_fsync,
94         .vop_getpages =         vop_stdgetpages,
95         .vop_putpages =         vop_stdputpages,
96         .vop_read =             hammer_vop_read,
97         .vop_write =            hammer_vop_write,
98         .vop_access =           hammer_vop_access,
99         .vop_advlock =          hammer_vop_advlock,
100         .vop_close =            hammer_vop_close,
101         .vop_ncreate =          hammer_vop_ncreate,
102         .vop_getattr =          hammer_vop_getattr,
103         .vop_inactive =         hammer_vop_inactive,
104         .vop_reclaim =          hammer_vop_reclaim,
105         .vop_nresolve =         hammer_vop_nresolve,
106         .vop_nlookupdotdot =    hammer_vop_nlookupdotdot,
107         .vop_nlink =            hammer_vop_nlink,
108         .vop_nmkdir =           hammer_vop_nmkdir,
109         .vop_nmknod =           hammer_vop_nmknod,
110         .vop_open =             hammer_vop_open,
111         .vop_pathconf =         hammer_vop_pathconf,
112         .vop_print =            hammer_vop_print,
113         .vop_readdir =          hammer_vop_readdir,
114         .vop_readlink =         hammer_vop_readlink,
115         .vop_nremove =          hammer_vop_nremove,
116         .vop_nrename =          hammer_vop_nrename,
117         .vop_nrmdir =           hammer_vop_nrmdir,
118         .vop_setattr =          hammer_vop_setattr,
119         .vop_strategy =         hammer_vop_strategy,
120         .vop_nsymlink =         hammer_vop_nsymlink,
121         .vop_nwhiteout =        hammer_vop_nwhiteout,
122         .vop_ioctl =            hammer_vop_ioctl,
123         .vop_mountctl =         hammer_vop_mountctl
124 };
125
126 struct vop_ops hammer_spec_vops = {
127         .vop_default =          spec_vnoperate,
128         .vop_fsync =            hammer_vop_fsync,
129         .vop_read =             hammer_vop_specread,
130         .vop_write =            hammer_vop_specwrite,
131         .vop_access =           hammer_vop_access,
132         .vop_close =            hammer_vop_specclose,
133         .vop_getattr =          hammer_vop_getattr,
134         .vop_inactive =         hammer_vop_inactive,
135         .vop_reclaim =          hammer_vop_reclaim,
136         .vop_setattr =          hammer_vop_setattr
137 };
138
139 struct vop_ops hammer_fifo_vops = {
140         .vop_default =          fifo_vnoperate,
141         .vop_fsync =            hammer_vop_fsync,
142         .vop_read =             hammer_vop_fiforead,
143         .vop_write =            hammer_vop_fifowrite,
144         .vop_access =           hammer_vop_access,
145         .vop_close =            hammer_vop_fifoclose,
146         .vop_getattr =          hammer_vop_getattr,
147         .vop_inactive =         hammer_vop_inactive,
148         .vop_reclaim =          hammer_vop_reclaim,
149         .vop_setattr =          hammer_vop_setattr
150 };
151
152 static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
153                            struct vnode *dvp, struct ucred *cred, int flags);
154 static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
155 static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
156
157 #if 0
158 static
159 int
160 hammer_vop_vnoperate(struct vop_generic_args *)
161 {
162         return (VOCALL(&hammer_vnode_vops, ap));
163 }
164 #endif
165
166 /*
167  * hammer_vop_fsync { vp, waitfor }
168  */
169 static
170 int
171 hammer_vop_fsync(struct vop_fsync_args *ap)
172 {
173         hammer_inode_t ip = VTOI(ap->a_vp);
174
175         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
176         vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
177         if (ap->a_waitfor == MNT_WAIT)
178                 hammer_wait_inode(ip);
179         return (ip->error);
180 }
181
182 /*
183  * hammer_vop_read { vp, uio, ioflag, cred }
184  */
185 static
186 int
187 hammer_vop_read(struct vop_read_args *ap)
188 {
189         struct hammer_transaction trans;
190         hammer_inode_t ip;
191         off_t offset;
192         struct buf *bp;
193         struct uio *uio;
194         int error;
195         int n;
196         int seqcount;
197
198         if (ap->a_vp->v_type != VREG)
199                 return (EINVAL);
200         ip = VTOI(ap->a_vp);
201         error = 0;
202         seqcount = ap->a_ioflag >> 16;
203
204         hammer_start_transaction(&trans, ip->hmp);
205
206         /*
207          * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
208          */
209         uio = ap->a_uio;
210         while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_rec.ino_size) {
211                 offset = uio->uio_offset & HAMMER_BUFMASK;
212 #if 0
213                 error = cluster_read(ap->a_vp, ip->ino_rec.ino_size,
214                                      uio->uio_offset - offset, HAMMER_BUFSIZE,
215                                      MAXBSIZE, seqcount, &bp);
216 #endif
217                 error = bread(ap->a_vp, uio->uio_offset - offset,
218                               HAMMER_BUFSIZE, &bp);
219                 if (error) {
220                         brelse(bp);
221                         break;
222                 }
223                 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
224                 n = HAMMER_BUFSIZE - offset;
225                 if (n > uio->uio_resid)
226                         n = uio->uio_resid;
227                 if (n > ip->ino_rec.ino_size - uio->uio_offset)
228                         n = (int)(ip->ino_rec.ino_size - uio->uio_offset);
229                 error = uiomove((char *)bp->b_data + offset, n, uio);
230                 if (error) {
231                         bqrelse(bp);
232                         break;
233                 }
234                 bqrelse(bp);
235         }
236         if ((ip->flags & HAMMER_INODE_RO) == 0 &&
237             (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
238                 ip->ino_rec.ino_atime = trans.time;
239                 hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
240         }
241         hammer_done_transaction(&trans);
242         return (error);
243 }
244
245 /*
246  * hammer_vop_write { vp, uio, ioflag, cred }
247  */
248 static
249 int
250 hammer_vop_write(struct vop_write_args *ap)
251 {
252         struct hammer_transaction trans;
253         struct hammer_inode *ip;
254         struct uio *uio;
255         off_t offset;
256         struct buf *bp;
257         int error;
258         int n;
259         int flags;
260         int count;
261
262         if (ap->a_vp->v_type != VREG)
263                 return (EINVAL);
264         ip = VTOI(ap->a_vp);
265         error = 0;
266
267         if (ip->flags & HAMMER_INODE_RO)
268                 return (EROFS);
269
270         /*
271          * Create a transaction to cover the operations we perform.
272          */
273         hammer_start_transaction(&trans, ip->hmp);
274         uio = ap->a_uio;
275
276         /*
277          * Check append mode
278          */
279         if (ap->a_ioflag & IO_APPEND)
280                 uio->uio_offset = ip->ino_rec.ino_size;
281
282         /*
283          * Check for illegal write offsets.  Valid range is 0...2^63-1
284          */
285         if (uio->uio_offset < 0 || uio->uio_offset + uio->uio_resid <= 0) {
286                 hammer_done_transaction(&trans);
287                 return (EFBIG);
288         }
289
290         /*
291          * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
292          */
293         count = 0;
294         while (uio->uio_resid > 0) {
295                 int fixsize = 0;
296
297                 /*
298                  * Do not allow huge writes to deadlock the buffer cache
299                  */
300                 if ((++count & 15) == 0) {
301                         vn_unlock(ap->a_vp);
302                         if ((ap->a_ioflag & IO_NOBWILL) == 0)
303                                 bwillwrite();
304                         vn_lock(ap->a_vp, LK_EXCLUSIVE|LK_RETRY);
305                 }
306
307                 offset = uio->uio_offset & HAMMER_BUFMASK;
308                 n = HAMMER_BUFSIZE - offset;
309                 if (n > uio->uio_resid)
310                         n = uio->uio_resid;
311                 if (uio->uio_offset + n > ip->ino_rec.ino_size) {
312                         vnode_pager_setsize(ap->a_vp, uio->uio_offset + n);
313                         fixsize = 1;
314                 }
315
316                 if (uio->uio_segflg == UIO_NOCOPY) {
317                         /*
318                          * Issuing a write with the same data backing the
319                          * buffer.  Instantiate the buffer to collect the
320                          * backing vm pages, then read-in any missing bits.
321                          *
322                          * This case is used by vop_stdputpages().
323                          */
324                         bp = getblk(ap->a_vp, uio->uio_offset - offset,
325                                     HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
326                         if ((bp->b_flags & B_CACHE) == 0) {
327                                 bqrelse(bp);
328                                 error = bread(ap->a_vp,
329                                               uio->uio_offset - offset,
330                                               HAMMER_BUFSIZE, &bp);
331                         }
332                 } else if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
333                         /*
334                          * Even though we are entirely overwriting the buffer
335                          * we may still have to zero it out to avoid a 
336                          * mmap/write visibility issue.
337                          */
338                         bp = getblk(ap->a_vp, uio->uio_offset - offset,
339                                     HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
340                         if ((bp->b_flags & B_CACHE) == 0)
341                                 vfs_bio_clrbuf(bp);
342                 } else if (uio->uio_offset - offset >= ip->ino_rec.ino_size) {
343                         /*
344                          * If the base offset of the buffer is beyond the
345                          * file EOF, we don't have to issue a read.
346                          */
347                         bp = getblk(ap->a_vp, uio->uio_offset - offset,
348                                     HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
349                         vfs_bio_clrbuf(bp);
350                 } else {
351                         /*
352                          * Partial overwrite, read in any missing bits then
353                          * replace the portion being written.
354                          */
355                         error = bread(ap->a_vp, uio->uio_offset - offset,
356                                       HAMMER_BUFSIZE, &bp);
357                         if (error == 0)
358                                 bheavy(bp);
359                 }
360                 if (error == 0)
361                         error = uiomove((char *)bp->b_data + offset, n, uio);
362
363                 /*
364                  * If we screwed up we have to undo any VM size changes we
365                  * made.
366                  */
367                 if (error) {
368                         brelse(bp);
369                         if (fixsize) {
370                                 vtruncbuf(ap->a_vp, ip->ino_rec.ino_size,
371                                           HAMMER_BUFSIZE);
372                         }
373                         break;
374                 }
375                 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
376                 if (ip->ino_rec.ino_size < uio->uio_offset) {
377                         ip->ino_rec.ino_size = uio->uio_offset;
378                         flags = HAMMER_INODE_RDIRTY;
379                         vnode_pager_setsize(ap->a_vp, ip->ino_rec.ino_size);
380                 } else {
381                         flags = 0;
382                 }
383                 ip->ino_rec.ino_mtime = trans.time;
384                 flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS;
385                 hammer_modify_inode(&trans, ip, flags);
386
387                 if (ap->a_ioflag & IO_SYNC) {
388                         bwrite(bp);
389                 } else if (ap->a_ioflag & IO_DIRECT) {
390                         bawrite(bp);
391 #if 0
392                 } else if ((ap->a_ioflag >> 16) == IO_SEQMAX &&
393                            (uio->uio_offset & HAMMER_BUFMASK) == 0) {
394                         /*
395                          * XXX HAMMER can only fsync the whole inode,
396                          * doing it on every buffer would be a bad idea.
397                          */
398                         /*
399                          * If seqcount indicates sequential operation and
400                          * we just finished filling a buffer, push it out
401                          * now to prevent the buffer cache from becoming
402                          * too full, which would trigger non-optimal
403                          * flushes.
404                          */
405                         bdwrite(bp);
406 #endif
407                 } else {
408                         bdwrite(bp);
409                 }
410         }
411         hammer_done_transaction(&trans);
412         return (error);
413 }
414
415 /*
416  * hammer_vop_access { vp, mode, cred }
417  */
418 static
419 int
420 hammer_vop_access(struct vop_access_args *ap)
421 {
422         struct hammer_inode *ip = VTOI(ap->a_vp);
423         uid_t uid;
424         gid_t gid;
425         int error;
426
427         uid = hammer_to_unix_xid(&ip->ino_data.uid);
428         gid = hammer_to_unix_xid(&ip->ino_data.gid);
429
430         error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
431                                   ip->ino_data.uflags);
432         return (error);
433 }
434
435 /*
436  * hammer_vop_advlock { vp, id, op, fl, flags }
437  */
438 static
439 int
440 hammer_vop_advlock(struct vop_advlock_args *ap)
441 {
442         struct hammer_inode *ip = VTOI(ap->a_vp);
443
444         return (lf_advlock(ap, &ip->advlock, ip->ino_rec.ino_size));
445 }
446
447 /*
448  * hammer_vop_close { vp, fflag }
449  */
450 static
451 int
452 hammer_vop_close(struct vop_close_args *ap)
453 {
454         return (vop_stdclose(ap));
455 }
456
457 /*
458  * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
459  *
460  * The operating system has already ensured that the directory entry
461  * does not exist and done all appropriate namespace locking.
462  */
463 static
464 int
465 hammer_vop_ncreate(struct vop_ncreate_args *ap)
466 {
467         struct hammer_transaction trans;
468         struct hammer_inode *dip;
469         struct hammer_inode *nip;
470         struct nchandle *nch;
471         int error;
472
473         nch = ap->a_nch;
474         dip = VTOI(ap->a_dvp);
475
476         if (dip->flags & HAMMER_INODE_RO)
477                 return (EROFS);
478
479         /*
480          * Create a transaction to cover the operations we perform.
481          */
482         hammer_start_transaction(&trans, dip->hmp);
483
484         /*
485          * Create a new filesystem object of the requested type.  The
486          * returned inode will be referenced and shared-locked to prevent
487          * it from being moved to the flusher.
488          */
489
490         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
491         if (error) {
492                 kprintf("hammer_create_inode error %d\n", error);
493                 hammer_done_transaction(&trans);
494                 *ap->a_vpp = NULL;
495                 return (error);
496         }
497
498         /*
499          * Add the new filesystem object to the directory.  This will also
500          * bump the inode's link count.
501          */
502         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
503         if (error)
504                 kprintf("hammer_ip_add_directory error %d\n", error);
505
506         /*
507          * Finish up.
508          */
509         if (error) {
510                 hammer_rel_inode(nip, 0);
511                 hammer_done_transaction(&trans);
512                 *ap->a_vpp = NULL;
513         } else {
514                 error = hammer_get_vnode(nip, ap->a_vpp);
515                 hammer_done_transaction(&trans);
516                 hammer_rel_inode(nip, 0);
517                 if (error == 0) {
518                         cache_setunresolved(ap->a_nch);
519                         cache_setvp(ap->a_nch, *ap->a_vpp);
520                 }
521         }
522         return (error);
523 }
524
525 /*
526  * hammer_vop_getattr { vp, vap }
527  */
528 static
529 int
530 hammer_vop_getattr(struct vop_getattr_args *ap)
531 {
532         struct hammer_inode *ip = VTOI(ap->a_vp);
533         struct vattr *vap = ap->a_vap;
534
535 #if 0
536         if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) &&
537             (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 &&
538             ip->obj_asof == XXX
539         ) {
540                 /* LAZYMOD XXX */
541         }
542         hammer_itimes(ap->a_vp);
543 #endif
544
545         vap->va_fsid = ip->hmp->fsid_udev;
546         vap->va_fileid = ip->ino_rec.base.base.obj_id;
547         vap->va_mode = ip->ino_data.mode;
548         vap->va_nlink = ip->ino_rec.ino_nlinks;
549         vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
550         vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
551         vap->va_rmajor = 0;
552         vap->va_rminor = 0;
553         vap->va_size = ip->ino_rec.ino_size;
554         hammer_to_timespec(ip->ino_rec.ino_atime, &vap->va_atime);
555         hammer_to_timespec(ip->ino_rec.ino_mtime, &vap->va_mtime);
556         hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
557         vap->va_flags = ip->ino_data.uflags;
558         vap->va_gen = 1;        /* hammer inums are unique for all time */
559         vap->va_blocksize = HAMMER_BUFSIZE;
560         vap->va_bytes = (ip->ino_rec.ino_size + 63) & ~63;
561         vap->va_type = hammer_get_vnode_type(ip->ino_rec.base.base.obj_type);
562         vap->va_filerev = 0;    /* XXX */
563         /* mtime uniquely identifies any adjustments made to the file */
564         vap->va_fsmid = ip->ino_rec.ino_mtime;
565         vap->va_uid_uuid = ip->ino_data.uid;
566         vap->va_gid_uuid = ip->ino_data.gid;
567         vap->va_fsid_uuid = ip->hmp->fsid;
568         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
569                           VA_FSID_UUID_VALID;
570
571         switch (ip->ino_rec.base.base.obj_type) {
572         case HAMMER_OBJTYPE_CDEV:
573         case HAMMER_OBJTYPE_BDEV:
574                 vap->va_rmajor = ip->ino_data.rmajor;
575                 vap->va_rminor = ip->ino_data.rminor;
576                 break;
577         default:
578                 break;
579         }
580
581         return(0);
582 }
583
584 /*
585  * hammer_vop_nresolve { nch, dvp, cred }
586  *
587  * Locate the requested directory entry.
588  */
589 static
590 int
591 hammer_vop_nresolve(struct vop_nresolve_args *ap)
592 {
593         struct hammer_transaction trans;
594         struct namecache *ncp;
595         hammer_inode_t dip;
596         hammer_inode_t ip;
597         hammer_tid_t asof;
598         struct hammer_cursor cursor;
599         union hammer_record_ondisk *rec;
600         struct vnode *vp;
601         int64_t namekey;
602         int error;
603         int i;
604         int nlen;
605         int flags;
606         u_int64_t obj_id;
607
608         /*
609          * Misc initialization, plus handle as-of name extensions.  Look for
610          * the '@@' extension.  Note that as-of files and directories cannot
611          * be modified.
612          */
613         dip = VTOI(ap->a_dvp);
614         ncp = ap->a_nch->ncp;
615         asof = dip->obj_asof;
616         nlen = ncp->nc_nlen;
617         flags = dip->flags;
618
619         hammer_simple_transaction(&trans, dip->hmp);
620
621         for (i = 0; i < nlen; ++i) {
622                 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
623                         asof = hammer_str_to_tid(ncp->nc_name + i + 2);
624                         flags |= HAMMER_INODE_RO;
625                         break;
626                 }
627         }
628         nlen = i;
629
630         /*
631          * If there is no path component the time extension is relative to
632          * dip.
633          */
634         if (nlen == 0) {
635                 ip = hammer_get_inode(&trans, &dip->cache[1], dip->obj_id,
636                                       asof, flags, &error);
637                 if (error == 0) {
638                         error = hammer_get_vnode(ip, &vp);
639                         hammer_rel_inode(ip, 0);
640                 } else {
641                         vp = NULL;
642                 }
643                 if (error == 0) {
644                         vn_unlock(vp);
645                         cache_setvp(ap->a_nch, vp);
646                         vrele(vp);
647                 }
648                 goto done;
649         }
650
651         /*
652          * Calculate the namekey and setup the key range for the scan.  This
653          * works kinda like a chained hash table where the lower 32 bits
654          * of the namekey synthesize the chain.
655          *
656          * The key range is inclusive of both key_beg and key_end.
657          */
658         namekey = hammer_directory_namekey(ncp->nc_name, nlen);
659
660         error = hammer_init_cursor(&trans, &cursor, &dip->cache[0], dip);
661         cursor.key_beg.obj_id = dip->obj_id;
662         cursor.key_beg.key = namekey;
663         cursor.key_beg.create_tid = 0;
664         cursor.key_beg.delete_tid = 0;
665         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
666         cursor.key_beg.obj_type = 0;
667
668         cursor.key_end = cursor.key_beg;
669         cursor.key_end.key |= 0xFFFFFFFFULL;
670         cursor.asof = asof;
671         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
672
673         /*
674          * Scan all matching records (the chain), locate the one matching
675          * the requested path component.
676          *
677          * The hammer_ip_*() functions merge in-memory records with on-disk
678          * records for the purposes of the search.
679          */
680         obj_id = 0;
681
682         if (error == 0) {
683                 rec = NULL;
684                 error = hammer_ip_first(&cursor);
685                 while (error == 0) {
686                         error = hammer_ip_resolve_data(&cursor);
687                         if (error)
688                                 break;
689                         rec = cursor.record;
690                         if (nlen == rec->entry.base.data_len &&
691                             bcmp(ncp->nc_name, cursor.data, nlen) == 0) {
692                                 obj_id = rec->entry.obj_id;
693                                 break;
694                         }
695                         error = hammer_ip_next(&cursor);
696                 }
697         }
698         hammer_done_cursor(&cursor);
699         if (error == 0) {
700                 ip = hammer_get_inode(&trans, &dip->cache[1],
701                                       obj_id, asof, flags, &error);
702                 if (error == 0) {
703                         error = hammer_get_vnode(ip, &vp);
704                         hammer_rel_inode(ip, 0);
705                 } else {
706                         kprintf("nresolve: lookup %s failed dip %p (%016llx) on"
707                                 " inode %016llx error %d\n",
708                                 ncp->nc_name,
709                                 dip, dip->obj_id, obj_id, error);
710                         Debugger("x");
711                         vp = NULL;
712                 }
713                 if (error == 0) {
714                         vn_unlock(vp);
715                         cache_setvp(ap->a_nch, vp);
716                         vrele(vp);
717                 }
718         } else if (error == ENOENT) {
719                 cache_setvp(ap->a_nch, NULL);
720         }
721 done:
722         hammer_done_transaction(&trans);
723         return (error);
724 }
725
726 /*
727  * hammer_vop_nlookupdotdot { dvp, vpp, cred }
728  *
729  * Locate the parent directory of a directory vnode.
730  *
731  * dvp is referenced but not locked.  *vpp must be returned referenced and
732  * locked.  A parent_obj_id of 0 does not necessarily indicate that we are
733  * at the root, instead it could indicate that the directory we were in was
734  * removed.
735  *
736  * NOTE: as-of sequences are not linked into the directory structure.  If
737  * we are at the root with a different asof then the mount point, reload
738  * the same directory with the mount point's asof.   I'm not sure what this
739  * will do to NFS.  We encode ASOF stamps in NFS file handles so it might not
740  * get confused, but it hasn't been tested.
741  */
742 static
743 int
744 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
745 {
746         struct hammer_transaction trans;
747         struct hammer_inode *dip;
748         struct hammer_inode *ip;
749         int64_t parent_obj_id;
750         hammer_tid_t asof;
751         int error;
752
753         dip = VTOI(ap->a_dvp);
754         asof = dip->obj_asof;
755         parent_obj_id = dip->ino_data.parent_obj_id;
756
757         if (parent_obj_id == 0) {
758                 if (dip->obj_id == HAMMER_OBJID_ROOT &&
759                    asof != dip->hmp->asof) {
760                         parent_obj_id = dip->obj_id;
761                         asof = dip->hmp->asof;
762                         *ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK);
763                         ksnprintf(*ap->a_fakename, 19, "0x%016llx",
764                                    dip->obj_asof);
765                 } else {
766                         *ap->a_vpp = NULL;
767                         return ENOENT;
768                 }
769         }
770
771         hammer_simple_transaction(&trans, dip->hmp);
772
773         ip = hammer_get_inode(&trans, &dip->cache[1], parent_obj_id,
774                               asof, dip->flags, &error);
775         if (ip) {
776                 error = hammer_get_vnode(ip, ap->a_vpp);
777                 hammer_rel_inode(ip, 0);
778         } else {
779                 *ap->a_vpp = NULL;
780         }
781         hammer_done_transaction(&trans);
782         return (error);
783 }
784
785 /*
786  * hammer_vop_nlink { nch, dvp, vp, cred }
787  */
788 static
789 int
790 hammer_vop_nlink(struct vop_nlink_args *ap)
791 {
792         struct hammer_transaction trans;
793         struct hammer_inode *dip;
794         struct hammer_inode *ip;
795         struct nchandle *nch;
796         int error;
797
798         nch = ap->a_nch;
799         dip = VTOI(ap->a_dvp);
800         ip = VTOI(ap->a_vp);
801
802         if (dip->flags & HAMMER_INODE_RO)
803                 return (EROFS);
804         if (ip->flags & HAMMER_INODE_RO)
805                 return (EROFS);
806
807         /*
808          * Create a transaction to cover the operations we perform.
809          */
810         hammer_start_transaction(&trans, dip->hmp);
811
812         /*
813          * Add the filesystem object to the directory.  Note that neither
814          * dip nor ip are referenced or locked, but their vnodes are
815          * referenced.  This function will bump the inode's link count.
816          */
817         error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip);
818
819         /*
820          * Finish up.
821          */
822         if (error == 0) {
823                 cache_setunresolved(nch);
824                 cache_setvp(nch, ap->a_vp);
825         }
826         hammer_done_transaction(&trans);
827         return (error);
828 }
829
830 /*
831  * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
832  *
833  * The operating system has already ensured that the directory entry
834  * does not exist and done all appropriate namespace locking.
835  */
836 static
837 int
838 hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
839 {
840         struct hammer_transaction trans;
841         struct hammer_inode *dip;
842         struct hammer_inode *nip;
843         struct nchandle *nch;
844         int error;
845
846         nch = ap->a_nch;
847         dip = VTOI(ap->a_dvp);
848
849         if (dip->flags & HAMMER_INODE_RO)
850                 return (EROFS);
851
852         /*
853          * Create a transaction to cover the operations we perform.
854          */
855         hammer_start_transaction(&trans, dip->hmp);
856
857         /*
858          * Create a new filesystem object of the requested type.  The
859          * returned inode will be referenced but not locked.
860          */
861         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
862         if (error) {
863                 kprintf("hammer_mkdir error %d\n", error);
864                 hammer_done_transaction(&trans);
865                 *ap->a_vpp = NULL;
866                 return (error);
867         }
868         /*
869          * Add the new filesystem object to the directory.  This will also
870          * bump the inode's link count.
871          */
872         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
873         if (error)
874                 kprintf("hammer_mkdir (add) error %d\n", error);
875
876         /*
877          * Finish up.
878          */
879         if (error) {
880                 hammer_rel_inode(nip, 0);
881                 *ap->a_vpp = NULL;
882         } else {
883                 error = hammer_get_vnode(nip, ap->a_vpp);
884                 hammer_rel_inode(nip, 0);
885                 if (error == 0) {
886                         cache_setunresolved(ap->a_nch);
887                         cache_setvp(ap->a_nch, *ap->a_vpp);
888                 }
889         }
890         hammer_done_transaction(&trans);
891         return (error);
892 }
893
894 /*
895  * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
896  *
897  * The operating system has already ensured that the directory entry
898  * does not exist and done all appropriate namespace locking.
899  */
900 static
901 int
902 hammer_vop_nmknod(struct vop_nmknod_args *ap)
903 {
904         struct hammer_transaction trans;
905         struct hammer_inode *dip;
906         struct hammer_inode *nip;
907         struct nchandle *nch;
908         int error;
909
910         nch = ap->a_nch;
911         dip = VTOI(ap->a_dvp);
912
913         if (dip->flags & HAMMER_INODE_RO)
914                 return (EROFS);
915
916         /*
917          * Create a transaction to cover the operations we perform.
918          */
919         hammer_start_transaction(&trans, dip->hmp);
920
921         /*
922          * Create a new filesystem object of the requested type.  The
923          * returned inode will be referenced but not locked.
924          */
925         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
926         if (error) {
927                 hammer_done_transaction(&trans);
928                 *ap->a_vpp = NULL;
929                 return (error);
930         }
931
932         /*
933          * Add the new filesystem object to the directory.  This will also
934          * bump the inode's link count.
935          */
936         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
937
938         /*
939          * Finish up.
940          */
941         if (error) {
942                 hammer_rel_inode(nip, 0);
943                 *ap->a_vpp = NULL;
944         } else {
945                 error = hammer_get_vnode(nip, ap->a_vpp);
946                 hammer_rel_inode(nip, 0);
947                 if (error == 0) {
948                         cache_setunresolved(ap->a_nch);
949                         cache_setvp(ap->a_nch, *ap->a_vpp);
950                 }
951         }
952         hammer_done_transaction(&trans);
953         return (error);
954 }
955
956 /*
957  * hammer_vop_open { vp, mode, cred, fp }
958  */
959 static
960 int
961 hammer_vop_open(struct vop_open_args *ap)
962 {
963         if ((ap->a_mode & FWRITE) && (VTOI(ap->a_vp)->flags & HAMMER_INODE_RO))
964                 return (EROFS);
965
966         return(vop_stdopen(ap));
967 }
968
969 /*
970  * hammer_vop_pathconf { vp, name, retval }
971  */
972 static
973 int
974 hammer_vop_pathconf(struct vop_pathconf_args *ap)
975 {
976         return EOPNOTSUPP;
977 }
978
979 /*
980  * hammer_vop_print { vp }
981  */
982 static
983 int
984 hammer_vop_print(struct vop_print_args *ap)
985 {
986         return EOPNOTSUPP;
987 }
988
989 /*
990  * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
991  */
992 static
993 int
994 hammer_vop_readdir(struct vop_readdir_args *ap)
995 {
996         struct hammer_transaction trans;
997         struct hammer_cursor cursor;
998         struct hammer_inode *ip;
999         struct uio *uio;
1000         hammer_record_ondisk_t rec;
1001         hammer_base_elm_t base;
1002         int error;
1003         int cookie_index;
1004         int ncookies;
1005         off_t *cookies;
1006         off_t saveoff;
1007         int r;
1008
1009         ip = VTOI(ap->a_vp);
1010         uio = ap->a_uio;
1011         saveoff = uio->uio_offset;
1012
1013         if (ap->a_ncookies) {
1014                 ncookies = uio->uio_resid / 16 + 1;
1015                 if (ncookies > 1024)
1016                         ncookies = 1024;
1017                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
1018                 cookie_index = 0;
1019         } else {
1020                 ncookies = -1;
1021                 cookies = NULL;
1022                 cookie_index = 0;
1023         }
1024
1025         hammer_simple_transaction(&trans, ip->hmp);
1026
1027         /*
1028          * Handle artificial entries
1029          */
1030         error = 0;
1031         if (saveoff == 0) {
1032                 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, ".");
1033                 if (r)
1034                         goto done;
1035                 if (cookies)
1036                         cookies[cookie_index] = saveoff;
1037                 ++saveoff;
1038                 ++cookie_index;
1039                 if (cookie_index == ncookies)
1040                         goto done;
1041         }
1042         if (saveoff == 1) {
1043                 if (ip->ino_data.parent_obj_id) {
1044                         r = vop_write_dirent(&error, uio,
1045                                              ip->ino_data.parent_obj_id,
1046                                              DT_DIR, 2, "..");
1047                 } else {
1048                         r = vop_write_dirent(&error, uio,
1049                                              ip->obj_id, DT_DIR, 2, "..");
1050                 }
1051                 if (r)
1052                         goto done;
1053                 if (cookies)
1054                         cookies[cookie_index] = saveoff;
1055                 ++saveoff;
1056                 ++cookie_index;
1057                 if (cookie_index == ncookies)
1058                         goto done;
1059         }
1060
1061         /*
1062          * Key range (begin and end inclusive) to scan.  Directory keys
1063          * directly translate to a 64 bit 'seek' position.
1064          */
1065         hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
1066         cursor.key_beg.obj_id = ip->obj_id;
1067         cursor.key_beg.create_tid = 0;
1068         cursor.key_beg.delete_tid = 0;
1069         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1070         cursor.key_beg.obj_type = 0;
1071         cursor.key_beg.key = saveoff;
1072
1073         cursor.key_end = cursor.key_beg;
1074         cursor.key_end.key = HAMMER_MAX_KEY;
1075         cursor.asof = ip->obj_asof;
1076         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1077
1078         error = hammer_ip_first(&cursor);
1079
1080         while (error == 0) {
1081                 error = hammer_ip_resolve_record_and_data(&cursor);
1082                 if (error)
1083                         break;
1084                 rec = cursor.record;
1085                 base = &rec->base.base;
1086                 saveoff = base->key;
1087
1088                 if (base->obj_id != ip->obj_id)
1089                         panic("readdir: bad record at %p", cursor.node);
1090
1091                 r = vop_write_dirent(
1092                              &error, uio, rec->entry.obj_id,
1093                              hammer_get_dtype(rec->entry.base.base.obj_type),
1094                              rec->entry.base.data_len,
1095                              (void *)cursor.data);
1096                 if (r)
1097                         break;
1098                 ++saveoff;
1099                 if (cookies)
1100                         cookies[cookie_index] = base->key;
1101                 ++cookie_index;
1102                 if (cookie_index == ncookies)
1103                         break;
1104                 error = hammer_ip_next(&cursor);
1105         }
1106         hammer_done_cursor(&cursor);
1107
1108 done:
1109         hammer_done_transaction(&trans);
1110
1111         if (ap->a_eofflag)
1112                 *ap->a_eofflag = (error == ENOENT);
1113         uio->uio_offset = saveoff;
1114         if (error && cookie_index == 0) {
1115                 if (error == ENOENT)
1116                         error = 0;
1117                 if (cookies) {
1118                         kfree(cookies, M_TEMP);
1119                         *ap->a_ncookies = 0;
1120                         *ap->a_cookies = NULL;
1121                 }
1122         } else {
1123                 if (error == ENOENT)
1124                         error = 0;
1125                 if (cookies) {
1126                         *ap->a_ncookies = cookie_index;
1127                         *ap->a_cookies = cookies;
1128                 }
1129         }
1130         return(error);
1131 }
1132
1133 /*
1134  * hammer_vop_readlink { vp, uio, cred }
1135  */
1136 static
1137 int
1138 hammer_vop_readlink(struct vop_readlink_args *ap)
1139 {
1140         struct hammer_transaction trans;
1141         struct hammer_cursor cursor;
1142         struct hammer_inode *ip;
1143         int error;
1144
1145         ip = VTOI(ap->a_vp);
1146
1147         hammer_simple_transaction(&trans, ip->hmp);
1148
1149         hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
1150
1151         /*
1152          * Key range (begin and end inclusive) to scan.  Directory keys
1153          * directly translate to a 64 bit 'seek' position.
1154          */
1155         cursor.key_beg.obj_id = ip->obj_id;
1156         cursor.key_beg.create_tid = 0;
1157         cursor.key_beg.delete_tid = 0;
1158         cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX;
1159         cursor.key_beg.obj_type = 0;
1160         cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK;
1161         cursor.asof = ip->obj_asof;
1162         cursor.flags |= HAMMER_CURSOR_ASOF;
1163
1164         error = hammer_ip_lookup(&cursor, ip);
1165         if (error == 0) {
1166                 error = hammer_ip_resolve_data(&cursor);
1167                 if (error == 0) {
1168                         error = uiomove((char *)cursor.data,
1169                                         cursor.record->base.data_len,
1170                                         ap->a_uio);
1171                 }
1172         }
1173         hammer_done_cursor(&cursor);
1174         hammer_done_transaction(&trans);
1175         return(error);
1176 }
1177
1178 /*
1179  * hammer_vop_nremove { nch, dvp, cred }
1180  */
1181 static
1182 int
1183 hammer_vop_nremove(struct vop_nremove_args *ap)
1184 {
1185         struct hammer_transaction trans;
1186         int error;
1187
1188         hammer_start_transaction(&trans, VTOI(ap->a_dvp)->hmp);
1189         error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
1190         hammer_done_transaction(&trans);
1191
1192         return (error);
1193 }
1194
1195 /*
1196  * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1197  */
1198 static
1199 int
1200 hammer_vop_nrename(struct vop_nrename_args *ap)
1201 {
1202         struct hammer_transaction trans;
1203         struct namecache *fncp;
1204         struct namecache *tncp;
1205         struct hammer_inode *fdip;
1206         struct hammer_inode *tdip;
1207         struct hammer_inode *ip;
1208         struct hammer_cursor cursor;
1209         union hammer_record_ondisk *rec;
1210         int64_t namekey;
1211         int error;
1212
1213         fdip = VTOI(ap->a_fdvp);
1214         tdip = VTOI(ap->a_tdvp);
1215         fncp = ap->a_fnch->ncp;
1216         tncp = ap->a_tnch->ncp;
1217         ip = VTOI(fncp->nc_vp);
1218         KKASSERT(ip != NULL);
1219
1220         if (fdip->flags & HAMMER_INODE_RO)
1221                 return (EROFS);
1222         if (tdip->flags & HAMMER_INODE_RO)
1223                 return (EROFS);
1224         if (ip->flags & HAMMER_INODE_RO)
1225                 return (EROFS);
1226
1227         hammer_start_transaction(&trans, fdip->hmp);
1228
1229         /*
1230          * Remove tncp from the target directory and then link ip as
1231          * tncp. XXX pass trans to dounlink
1232          *
1233          * Force the inode sync-time to match the transaction so it is
1234          * in-sync with the creation of the target directory entry.
1235          */
1236         error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, ap->a_cred, 0);
1237         if (error == 0 || error == ENOENT) {
1238                 error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
1239                 if (error == 0) {
1240                         ip->ino_data.parent_obj_id = tdip->obj_id;
1241                         hammer_modify_inode(&trans, ip, HAMMER_INODE_DDIRTY);
1242                 }
1243         }
1244         if (error)
1245                 goto failed; /* XXX */
1246
1247         /*
1248          * Locate the record in the originating directory and remove it.
1249          *
1250          * Calculate the namekey and setup the key range for the scan.  This
1251          * works kinda like a chained hash table where the lower 32 bits
1252          * of the namekey synthesize the chain.
1253          *
1254          * The key range is inclusive of both key_beg and key_end.
1255          */
1256         namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
1257 retry:
1258         hammer_init_cursor(&trans, &cursor, &fdip->cache[0], fdip);
1259         cursor.key_beg.obj_id = fdip->obj_id;
1260         cursor.key_beg.key = namekey;
1261         cursor.key_beg.create_tid = 0;
1262         cursor.key_beg.delete_tid = 0;
1263         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1264         cursor.key_beg.obj_type = 0;
1265
1266         cursor.key_end = cursor.key_beg;
1267         cursor.key_end.key |= 0xFFFFFFFFULL;
1268         cursor.asof = fdip->obj_asof;
1269         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1270
1271         /*
1272          * Scan all matching records (the chain), locate the one matching
1273          * the requested path component.
1274          *
1275          * The hammer_ip_*() functions merge in-memory records with on-disk
1276          * records for the purposes of the search.
1277          */
1278         error = hammer_ip_first(&cursor);
1279         while (error == 0) {
1280                 if (hammer_ip_resolve_data(&cursor) != 0)
1281                         break;
1282                 rec = cursor.record;
1283                 if (fncp->nc_nlen == rec->entry.base.data_len &&
1284                     bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) {
1285                         break;
1286                 }
1287                 error = hammer_ip_next(&cursor);
1288         }
1289
1290         /*
1291          * If all is ok we have to get the inode so we can adjust nlinks.
1292          *
1293          * WARNING: hammer_ip_del_directory() may have to terminate the
1294          * cursor to avoid a recursion.  It's ok to call hammer_done_cursor()
1295          * twice.
1296          */
1297         if (error == 0)
1298                 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
1299
1300         /*
1301          * XXX A deadlock here will break rename's atomicy for the purposes
1302          * of crash recovery.
1303          */
1304         if (error == EDEADLK) {
1305                 hammer_done_cursor(&cursor);
1306                 goto retry;
1307         }
1308
1309         /*
1310          * Cleanup and tell the kernel that the rename succeeded.
1311          */
1312         hammer_done_cursor(&cursor);
1313         if (error == 0)
1314                 cache_rename(ap->a_fnch, ap->a_tnch);
1315
1316 failed:
1317         hammer_done_transaction(&trans);
1318         return (error);
1319 }
1320
1321 /*
1322  * hammer_vop_nrmdir { nch, dvp, cred }
1323  */
1324 static
1325 int
1326 hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
1327 {
1328         struct hammer_transaction trans;
1329         int error;
1330
1331         hammer_start_transaction(&trans, VTOI(ap->a_dvp)->hmp);
1332         error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
1333         hammer_done_transaction(&trans);
1334
1335         return (error);
1336 }
1337
1338 /*
1339  * hammer_vop_setattr { vp, vap, cred }
1340  */
1341 static
1342 int
1343 hammer_vop_setattr(struct vop_setattr_args *ap)
1344 {
1345         struct hammer_transaction trans;
1346         struct vattr *vap;
1347         struct hammer_inode *ip;
1348         int modflags;
1349         int error;
1350         int truncating;
1351         off_t aligned_size;
1352         u_int32_t flags;
1353         uuid_t uuid;
1354
1355         vap = ap->a_vap;
1356         ip = ap->a_vp->v_data;
1357         modflags = 0;
1358
1359         if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1360                 return(EROFS);
1361         if (ip->flags & HAMMER_INODE_RO)
1362                 return (EROFS);
1363
1364         hammer_start_transaction(&trans, ip->hmp);
1365         error = 0;
1366
1367         if (vap->va_flags != VNOVAL) {
1368                 flags = ip->ino_data.uflags;
1369                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
1370                                          hammer_to_unix_xid(&ip->ino_data.uid),
1371                                          ap->a_cred);
1372                 if (error == 0) {
1373                         if (ip->ino_data.uflags != flags) {
1374                                 ip->ino_data.uflags = flags;
1375                                 modflags |= HAMMER_INODE_DDIRTY;
1376                         }
1377                         if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1378                                 error = 0;
1379                                 goto done;
1380                         }
1381                 }
1382                 goto done;
1383         }
1384         if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1385                 error = EPERM;
1386                 goto done;
1387         }
1388         if (vap->va_uid != (uid_t)VNOVAL) {
1389                 hammer_guid_to_uuid(&uuid, vap->va_uid);
1390                 if (bcmp(&uuid, &ip->ino_data.uid, sizeof(uuid)) != 0) {
1391                         ip->ino_data.uid = uuid;
1392                         modflags |= HAMMER_INODE_DDIRTY;
1393                 }
1394         }
1395         if (vap->va_gid != (uid_t)VNOVAL) {
1396                 hammer_guid_to_uuid(&uuid, vap->va_gid);
1397                 if (bcmp(&uuid, &ip->ino_data.gid, sizeof(uuid)) != 0) {
1398                         ip->ino_data.gid = uuid;
1399                         modflags |= HAMMER_INODE_DDIRTY;
1400                 }
1401         }
1402         while (vap->va_size != VNOVAL && ip->ino_rec.ino_size != vap->va_size) {
1403                 switch(ap->a_vp->v_type) {
1404                 case VREG:
1405                         if (vap->va_size == ip->ino_rec.ino_size)
1406                                 break;
1407                         /*
1408                          * XXX break atomicy, we can deadlock the backend
1409                          * if we do not release the lock.  Probably not a
1410                          * big deal here.
1411                          */
1412                         if (vap->va_size < ip->ino_rec.ino_size) {
1413                                 vtruncbuf(ap->a_vp, vap->va_size,
1414                                           HAMMER_BUFSIZE);
1415                                 truncating = 1;
1416                         } else {
1417                                 vnode_pager_setsize(ap->a_vp, vap->va_size);
1418                                 truncating = 0;
1419                         }
1420                         ip->ino_rec.ino_size = vap->va_size;
1421                         modflags |= HAMMER_INODE_RDIRTY;
1422                         aligned_size = (vap->va_size + HAMMER_BUFMASK) &
1423                                        ~HAMMER_BUFMASK64;
1424
1425                         /*
1426                          * on-media truncation is cached in the inode until
1427                          * the inode is synchronized.
1428                          */
1429                         if (truncating) {
1430                                 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
1431                                         ip->flags |= HAMMER_INODE_TRUNCATED;
1432                                         ip->trunc_off = vap->va_size;
1433                                 } else if (ip->trunc_off > vap->va_size) {
1434                                         ip->trunc_off = vap->va_size;
1435                                 }
1436                         }
1437
1438                         /*
1439                          * If truncating we have to clean out a portion of
1440                          * the last block on-disk.  We do this in the
1441                          * front-end buffer cache.
1442                          */
1443                         if (truncating && vap->va_size < aligned_size) {
1444                                 struct buf *bp;
1445                                 int offset;
1446
1447                                 offset = vap->va_size & HAMMER_BUFMASK;
1448                                 error = bread(ap->a_vp,
1449                                               aligned_size - HAMMER_BUFSIZE,
1450                                               HAMMER_BUFSIZE, &bp);
1451                                 if (error == 0) {
1452                                         bzero(bp->b_data + offset,
1453                                               HAMMER_BUFSIZE - offset);
1454                                         bdwrite(bp);
1455                                 } else {
1456                                         brelse(bp);
1457                                 }
1458                         }
1459                         break;
1460                 case VDATABASE:
1461                         if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
1462                                 ip->flags |= HAMMER_INODE_TRUNCATED;
1463                                 ip->trunc_off = vap->va_size;
1464                         } else if (ip->trunc_off > vap->va_size) {
1465                                 ip->trunc_off = vap->va_size;
1466                         }
1467                         ip->ino_rec.ino_size = vap->va_size;
1468                         modflags |= HAMMER_INODE_RDIRTY;
1469                         break;
1470                 default:
1471                         error = EINVAL;
1472                         goto done;
1473                 }
1474                 break;
1475         }
1476         if (vap->va_atime.tv_sec != VNOVAL) {
1477                 ip->ino_rec.ino_atime =
1478                         hammer_timespec_to_transid(&vap->va_atime);
1479                 modflags |= HAMMER_INODE_ITIMES;
1480         }
1481         if (vap->va_mtime.tv_sec != VNOVAL) {
1482                 ip->ino_rec.ino_mtime =
1483                         hammer_timespec_to_transid(&vap->va_mtime);
1484                 modflags |= HAMMER_INODE_ITIMES;
1485         }
1486         if (vap->va_mode != (mode_t)VNOVAL) {
1487                 if (ip->ino_data.mode != vap->va_mode) {
1488                         ip->ino_data.mode = vap->va_mode;
1489                         modflags |= HAMMER_INODE_DDIRTY;
1490                 }
1491         }
1492 done:
1493         if (error == 0)
1494                 hammer_modify_inode(&trans, ip, modflags);
1495         hammer_done_transaction(&trans);
1496         return (error);
1497 }
1498
1499 /*
1500  * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1501  */
1502 static
1503 int
1504 hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
1505 {
1506         struct hammer_transaction trans;
1507         struct hammer_inode *dip;
1508         struct hammer_inode *nip;
1509         struct nchandle *nch;
1510         hammer_record_t record;
1511         int error;
1512         int bytes;
1513
1514         ap->a_vap->va_type = VLNK;
1515
1516         nch = ap->a_nch;
1517         dip = VTOI(ap->a_dvp);
1518
1519         if (dip->flags & HAMMER_INODE_RO)
1520                 return (EROFS);
1521
1522         /*
1523          * Create a transaction to cover the operations we perform.
1524          */
1525         hammer_start_transaction(&trans, dip->hmp);
1526
1527         /*
1528          * Create a new filesystem object of the requested type.  The
1529          * returned inode will be referenced but not locked.
1530          */
1531
1532         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
1533         if (error) {
1534                 hammer_done_transaction(&trans);
1535                 *ap->a_vpp = NULL;
1536                 return (error);
1537         }
1538
1539         /*
1540          * Add a record representing the symlink.  symlink stores the link
1541          * as pure data, not a string, and is no \0 terminated.
1542          */
1543         if (error == 0) {
1544                 record = hammer_alloc_mem_record(nip);
1545                 record->type = HAMMER_MEM_RECORD_GENERAL;
1546                 bytes = strlen(ap->a_target);
1547
1548                 record->rec.base.base.key = HAMMER_FIXKEY_SYMLINK;
1549                 record->rec.base.base.rec_type = HAMMER_RECTYPE_FIX;
1550                 record->rec.base.data_len = bytes;
1551                 record->data = (void *)ap->a_target;
1552                 /* will be reallocated by routine below */
1553                 error = hammer_ip_add_record(&trans, record);
1554
1555                 /*
1556                  * Set the file size to the length of the link.
1557                  */
1558                 if (error == 0) {
1559                         nip->ino_rec.ino_size = bytes;
1560                         hammer_modify_inode(&trans, nip, HAMMER_INODE_RDIRTY);
1561                 }
1562         }
1563         if (error == 0)
1564                 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
1565
1566         /*
1567          * Finish up.
1568          */
1569         if (error) {
1570                 hammer_rel_inode(nip, 0);
1571                 *ap->a_vpp = NULL;
1572         } else {
1573                 error = hammer_get_vnode(nip, ap->a_vpp);
1574                 hammer_rel_inode(nip, 0);
1575                 if (error == 0) {
1576                         cache_setunresolved(ap->a_nch);
1577                         cache_setvp(ap->a_nch, *ap->a_vpp);
1578                 }
1579         }
1580         hammer_done_transaction(&trans);
1581         return (error);
1582 }
1583
1584 /*
1585  * hammer_vop_nwhiteout { nch, dvp, cred, flags }
1586  */
1587 static
1588 int
1589 hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
1590 {
1591         struct hammer_transaction trans;
1592         int error;
1593
1594         hammer_start_transaction(&trans, VTOI(ap->a_dvp)->hmp);
1595         error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp,
1596                                 ap->a_cred, ap->a_flags);
1597         hammer_done_transaction(&trans);
1598
1599         return (error);
1600 }
1601
1602 /*
1603  * hammer_vop_ioctl { vp, command, data, fflag, cred }
1604  */
1605 static
1606 int
1607 hammer_vop_ioctl(struct vop_ioctl_args *ap)
1608 {
1609         struct hammer_inode *ip = ap->a_vp->v_data;
1610
1611         return(hammer_ioctl(ip, ap->a_command, ap->a_data,
1612                             ap->a_fflag, ap->a_cred));
1613 }
1614
1615 static
1616 int
1617 hammer_vop_mountctl(struct vop_mountctl_args *ap)
1618 {
1619         struct mount *mp;
1620         int error;
1621
1622         mp = ap->a_head.a_ops->head.vv_mount;
1623
1624         switch(ap->a_op) {
1625         case MOUNTCTL_SET_EXPORT:
1626                 if (ap->a_ctllen != sizeof(struct export_args))
1627                         error = EINVAL;
1628                 error = hammer_vfs_export(mp, ap->a_op,
1629                                       (const struct export_args *)ap->a_ctl);
1630                 break;
1631         default:
1632                 error = journal_mountctl(ap);
1633                 break;
1634         }
1635         return(error);
1636 }
1637
1638 /*
1639  * hammer_vop_strategy { vp, bio }
1640  *
1641  * Strategy call, used for regular file read & write only.  Note that the
1642  * bp may represent a cluster.
1643  *
1644  * To simplify operation and allow better optimizations in the future,
1645  * this code does not make any assumptions with regards to buffer alignment
1646  * or size.
1647  */
1648 static
1649 int
1650 hammer_vop_strategy(struct vop_strategy_args *ap)
1651 {
1652         struct buf *bp;
1653         int error;
1654
1655         bp = ap->a_bio->bio_buf;
1656
1657         switch(bp->b_cmd) {
1658         case BUF_CMD_READ:
1659                 error = hammer_vop_strategy_read(ap);
1660                 break;
1661         case BUF_CMD_WRITE:
1662                 error = hammer_vop_strategy_write(ap);
1663                 break;
1664         default:
1665                 bp->b_error = error = EINVAL;
1666                 bp->b_flags |= B_ERROR;
1667                 biodone(ap->a_bio);
1668                 break;
1669         }
1670         return (error);
1671 }
1672
1673 /*
1674  * Read from a regular file.  Iterate the related records and fill in the
1675  * BIO/BUF.  Gaps are zero-filled.
1676  *
1677  * The support code in hammer_object.c should be used to deal with mixed
1678  * in-memory and on-disk records.
1679  *
1680  * XXX atime update
1681  */
1682 static
1683 int
1684 hammer_vop_strategy_read(struct vop_strategy_args *ap)
1685 {
1686         struct hammer_transaction trans;
1687         struct hammer_inode *ip;
1688         struct hammer_cursor cursor;
1689         hammer_record_ondisk_t rec;
1690         hammer_base_elm_t base;
1691         struct bio *bio;
1692         struct buf *bp;
1693         int64_t rec_offset;
1694         int64_t ran_end;
1695         int64_t tmp64;
1696         int error;
1697         int boff;
1698         int roff;
1699         int n;
1700
1701         bio = ap->a_bio;
1702         bp = bio->bio_buf;
1703         ip = ap->a_vp->v_data;
1704
1705         hammer_simple_transaction(&trans, ip->hmp);
1706         hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
1707
1708         /*
1709          * Key range (begin and end inclusive) to scan.  Note that the key's
1710          * stored in the actual records represent BASE+LEN, not BASE.  The
1711          * first record containing bio_offset will have a key > bio_offset.
1712          */
1713         cursor.key_beg.obj_id = ip->obj_id;
1714         cursor.key_beg.create_tid = 0;
1715         cursor.key_beg.delete_tid = 0;
1716         cursor.key_beg.obj_type = 0;
1717         cursor.key_beg.key = bio->bio_offset + 1;
1718         cursor.asof = ip->obj_asof;
1719         cursor.flags |= HAMMER_CURSOR_ASOF | HAMMER_CURSOR_DATAEXTOK;
1720
1721         cursor.key_end = cursor.key_beg;
1722         KKASSERT(ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_REGFILE);
1723 #if 0
1724         if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
1725                 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
1726                 cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
1727                 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1728         } else
1729 #endif
1730         {
1731                 ran_end = bio->bio_offset + bp->b_bufsize;
1732                 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
1733                 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
1734                 tmp64 = ran_end + MAXPHYS + 1;  /* work-around GCC-4 bug */
1735                 if (tmp64 < ran_end)
1736                         cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1737                 else
1738                         cursor.key_end.key = ran_end + MAXPHYS + 1;
1739         }
1740         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
1741
1742         error = hammer_ip_first(&cursor);
1743         boff = 0;
1744
1745         while (error == 0) {
1746                 error = hammer_ip_resolve_data(&cursor);
1747                 if (error)
1748                         break;
1749                 rec = cursor.record;
1750                 base = &rec->base.base;
1751
1752                 rec_offset = base->key - rec->data.base.data_len;
1753
1754                 /*
1755                  * Calculate the gap, if any, and zero-fill it.
1756                  */
1757                 n = (int)(rec_offset - (bio->bio_offset + boff));
1758                 if (n > 0) {
1759                         if (n > bp->b_bufsize - boff)
1760                                 n = bp->b_bufsize - boff;
1761                         bzero((char *)bp->b_data + boff, n);
1762                         boff += n;
1763                         n = 0;
1764                 }
1765
1766                 /*
1767                  * Calculate the data offset in the record and the number
1768                  * of bytes we can copy.
1769                  *
1770                  * Note there is a degenerate case here where boff may
1771                  * already be at bp->b_bufsize.
1772                  */
1773                 roff = -n;
1774                 rec_offset += roff;
1775                 n = rec->data.base.data_len - roff;
1776                 KKASSERT(n > 0);
1777                 if (n > bp->b_bufsize - boff)
1778                         n = bp->b_bufsize - boff;
1779
1780                 /*
1781                  * If we cached a truncation point on our front-end the
1782                  * on-disk version may still have physical records beyond
1783                  * that point.  Truncate visibility.
1784                  */
1785                 if (ip->trunc_off <= rec_offset)
1786                         n = 0;
1787                 else if (ip->trunc_off < rec_offset + n)
1788                         n = (int)(ip->trunc_off - rec_offset);
1789
1790                 /*
1791                  * Copy
1792                  */
1793                 if (n) {
1794                         bcopy((char *)cursor.data + roff,
1795                               (char *)bp->b_data + boff, n);
1796                         boff += n;
1797                 }
1798                 if (boff == bp->b_bufsize)
1799                         break;
1800                 error = hammer_ip_next(&cursor);
1801         }
1802         hammer_done_cursor(&cursor);
1803         hammer_done_transaction(&trans);
1804
1805         /*
1806          * There may have been a gap after the last record
1807          */
1808         if (error == ENOENT)
1809                 error = 0;
1810         if (error == 0 && boff != bp->b_bufsize) {
1811                 KKASSERT(boff < bp->b_bufsize);
1812                 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
1813                 /* boff = bp->b_bufsize; */
1814         }
1815         bp->b_resid = 0;
1816         bp->b_error = error;
1817         if (error)
1818                 bp->b_flags |= B_ERROR;
1819         biodone(ap->a_bio);
1820         return(error);
1821 }
1822
1823 /*
1824  * Write to a regular file.   Because this is a strategy call the OS is
1825  * trying to actually sync data to the media.   HAMMER can only flush
1826  * the entire inode (so the TID remains properly synchronized).
1827  *
1828  * Basically all we do here is place the bio on the inode's flush queue
1829  * and activate the flusher.
1830  */
1831 static
1832 int
1833 hammer_vop_strategy_write(struct vop_strategy_args *ap)
1834 {
1835         hammer_inode_t ip;
1836         struct bio *bio;
1837         struct buf *bp;
1838
1839         bio = ap->a_bio;
1840         bp = bio->bio_buf;
1841         ip = ap->a_vp->v_data;
1842
1843         if (ip->flags & HAMMER_INODE_RO) {
1844                 bp->b_error = EROFS;
1845                 bp->b_flags |= B_ERROR;
1846                 biodone(ap->a_bio);
1847                 return(EROFS);
1848         }
1849
1850         /*
1851          * If the inode is being flushed we cannot re-queue buffers
1852          * it may have already flushed, or it could result in duplicate
1853          * records in the database.
1854          */
1855         BUF_KERNPROC(bp);
1856         if (ip->flags & HAMMER_INODE_WRITE_ALT)
1857                 TAILQ_INSERT_TAIL(&ip->bio_alt_list, bio, bio_act);
1858         else
1859                 TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act);
1860         ++hammer_bio_count;
1861         hammer_modify_inode(NULL, ip, HAMMER_INODE_BUFS);
1862
1863         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1864 #if 0
1865         /*
1866          * XXX 
1867          *
1868          * If the write was not part of an integrated flush operation then
1869          * signal a flush.
1870          */
1871         if (ip->flush_state != HAMMER_FST_FLUSH ||
1872             (ip->flags & HAMMER_INODE_WRITE_ALT)) {
1873                 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1874         }
1875 #endif
1876         return(0);
1877 }
1878
1879 /*
1880  * Backend code which actually performs the write to the media.  This
1881  * routine is typically called from the flusher.  The bio will be disposed
1882  * of (biodone'd) by this routine.
1883  *
1884  * Iterate the related records and mark for deletion.  If existing edge
1885  * records (left and right side) overlap our write they have to be marked
1886  * deleted and new records created, usually referencing a portion of the
1887  * original data.  Then add a record to represent the buffer.
1888  */
1889 int
1890 hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip, struct bio *bio)
1891 {
1892         struct buf *bp = bio->bio_buf;
1893         int error;
1894
1895         KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
1896
1897         /*
1898          * If the inode is going or gone, just throw away any frontend
1899          * buffers.
1900          */
1901         if (ip->flags & HAMMER_INODE_DELETED) {
1902                 bp->b_resid = 0;
1903                 biodone(bio);
1904         }
1905
1906         /*
1907          * Delete any records overlapping our range.  This function will
1908          * (eventually) properly truncate partial overlaps.
1909          */
1910         if (ip->sync_ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
1911                 error = hammer_ip_delete_range(cursor, ip, bio->bio_offset,
1912                                                bio->bio_offset);
1913         } else {
1914                 error = hammer_ip_delete_range(cursor, ip, bio->bio_offset,
1915                                                bio->bio_offset +
1916                                                 bp->b_bufsize - 1);
1917         }
1918
1919         /*
1920          * Add a single record to cover the write.  We can write a record
1921          * with only the actual file data - for example, a small 200 byte
1922          * file does not have to write out a 16K record.
1923          *
1924          * While the data size does not have to be aligned, we still do it
1925          * to reduce fragmentation in a future allocation model.
1926          */
1927         if (error == 0) {
1928                 int limit_size;
1929
1930                 if (ip->sync_ino_rec.ino_size - bio->bio_offset > 
1931                     bp->b_bufsize) {
1932                             limit_size = bp->b_bufsize;
1933                 } else {
1934                         limit_size = (int)(ip->sync_ino_rec.ino_size -
1935                                            bio->bio_offset);
1936                         KKASSERT(limit_size >= 0);
1937                         limit_size = (limit_size + 63) & ~63;
1938                 }
1939                 if (limit_size) {
1940                         error = hammer_ip_sync_data(cursor, ip, bio->bio_offset,
1941                                                     bp->b_data, limit_size);
1942                 }
1943         }
1944         if (error)
1945                 Debugger("hammer_dowrite: error");
1946
1947         if (error) {
1948                 bp->b_resid = bp->b_bufsize;
1949                 bp->b_error = error;
1950                 bp->b_flags |= B_ERROR;
1951         } else {
1952                 bp->b_resid = 0;
1953         }
1954         biodone(bio);
1955         --hammer_bio_count;
1956         return(error);
1957 }
1958
1959 /*
1960  * dounlink - disconnect a directory entry
1961  *
1962  * XXX whiteout support not really in yet
1963  */
1964 static int
1965 hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
1966                 struct vnode *dvp, struct ucred *cred, int flags)
1967 {
1968         struct namecache *ncp;
1969         hammer_inode_t dip;
1970         hammer_inode_t ip;
1971         hammer_record_ondisk_t rec;
1972         struct hammer_cursor cursor;
1973         int64_t namekey;
1974         int error;
1975
1976         /*
1977          * Calculate the namekey and setup the key range for the scan.  This
1978          * works kinda like a chained hash table where the lower 32 bits
1979          * of the namekey synthesize the chain.
1980          *
1981          * The key range is inclusive of both key_beg and key_end.
1982          */
1983         dip = VTOI(dvp);
1984         ncp = nch->ncp;
1985
1986         if (dip->flags & HAMMER_INODE_RO)
1987                 return (EROFS);
1988
1989         namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
1990 retry:
1991         hammer_init_cursor(trans, &cursor, &dip->cache[0], dip);
1992         cursor.key_beg.obj_id = dip->obj_id;
1993         cursor.key_beg.key = namekey;
1994         cursor.key_beg.create_tid = 0;
1995         cursor.key_beg.delete_tid = 0;
1996         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1997         cursor.key_beg.obj_type = 0;
1998
1999         cursor.key_end = cursor.key_beg;
2000         cursor.key_end.key |= 0xFFFFFFFFULL;
2001         cursor.asof = dip->obj_asof;
2002         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
2003
2004         /*
2005          * Scan all matching records (the chain), locate the one matching
2006          * the requested path component.  info->last_error contains the
2007          * error code on search termination and could be 0, ENOENT, or
2008          * something else.
2009          *
2010          * The hammer_ip_*() functions merge in-memory records with on-disk
2011          * records for the purposes of the search.
2012          */
2013         rec = NULL;
2014         error = hammer_ip_first(&cursor);
2015
2016         while (error == 0) {
2017                 error = hammer_ip_resolve_data(&cursor);
2018                 if (error)
2019                         break;
2020                 rec = cursor.record;
2021                 if (ncp->nc_nlen == rec->entry.base.data_len &&
2022                     bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) {
2023                         break;
2024                 }
2025                 error = hammer_ip_next(&cursor);
2026         }
2027
2028         /*
2029          * If all is ok we have to get the inode so we can adjust nlinks.
2030          *
2031          * If the target is a directory, it must be empty.
2032          */
2033         if (error == 0) {
2034                 ip = hammer_get_inode(trans, &dip->cache[1],
2035                                       rec->entry.obj_id,
2036                                       dip->hmp->asof, 0, &error);
2037                 if (error == ENOENT) {
2038                         kprintf("obj_id %016llx\n", rec->entry.obj_id);
2039                         Debugger("ENOENT unlinking object that should exist");
2040                 }
2041
2042                 /*
2043                  * If we are trying to remove a directory the directory must
2044                  * be empty.
2045                  *
2046                  * WARNING: hammer_ip_check_directory_empty() may have to
2047                  * terminate the cursor to avoid a deadlock.  It is ok to
2048                  * call hammer_done_cursor() twice.
2049                  */
2050                 if (error == 0 && ip->ino_rec.base.base.obj_type ==
2051                                   HAMMER_OBJTYPE_DIRECTORY) {
2052                         error = hammer_ip_check_directory_empty(trans, &cursor,
2053                                                                 ip);
2054                 }
2055
2056                 /*
2057                  * Delete the directory entry.
2058                  *
2059                  * WARNING: hammer_ip_del_directory() may have to terminate
2060                  * the cursor to avoid a deadlock.  It is ok to call
2061                  * hammer_done_cursor() twice.
2062                  */
2063                 if (error == 0) {
2064                         error = hammer_ip_del_directory(trans, &cursor,
2065                                                         dip, ip);
2066                 }
2067                 if (error == 0) {
2068                         cache_setunresolved(nch);
2069                         cache_setvp(nch, NULL);
2070                         /* XXX locking */
2071                         if (ip->vp)
2072                                 cache_inval_vp(ip->vp, CINV_DESTROY);
2073                 }
2074                 hammer_rel_inode(ip, 0);
2075         }
2076         hammer_done_cursor(&cursor);
2077         if (error == EDEADLK)
2078                 goto retry;
2079
2080         return (error);
2081 }
2082
2083 /************************************************************************
2084  *                          FIFO AND SPECFS OPS                         *
2085  ************************************************************************
2086  *
2087  */
2088
2089 static int
2090 hammer_vop_fifoclose (struct vop_close_args *ap)
2091 {
2092         /* XXX update itimes */
2093         return (VOCALL(&fifo_vnode_vops, &ap->a_head));
2094 }
2095
2096 static int
2097 hammer_vop_fiforead (struct vop_read_args *ap)
2098 {
2099         int error;
2100
2101         error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2102         /* XXX update access time */
2103         return (error);
2104 }
2105
2106 static int
2107 hammer_vop_fifowrite (struct vop_write_args *ap)
2108 {
2109         int error;
2110
2111         error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2112         /* XXX update access time */
2113         return (error);
2114 }
2115
2116 static int
2117 hammer_vop_specclose (struct vop_close_args *ap)
2118 {
2119         /* XXX update itimes */
2120         return (VOCALL(&spec_vnode_vops, &ap->a_head));
2121 }
2122
2123 static int
2124 hammer_vop_specread (struct vop_read_args *ap)
2125 {
2126         /* XXX update access time */
2127         return (VOCALL(&spec_vnode_vops, &ap->a_head));
2128 }
2129
2130 static int
2131 hammer_vop_specwrite (struct vop_write_args *ap)
2132 {
2133         /* XXX update last change time */
2134         return (VOCALL(&spec_vnode_vops, &ap->a_head));
2135 }
2136