HAMMER 25/many: Pruning code
[dragonfly.git] / sys / vfs / hammer / hammer_vnops.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.26 2008/02/05 07:58:43 dillon Exp $
35  */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/namecache.h>
42 #include <sys/vnode.h>
43 #include <sys/lockf.h>
44 #include <sys/event.h>
45 #include <sys/stat.h>
46 #include <sys/dirent.h>
47 #include <vm/vm_extern.h>
48 #include <vfs/fifofs/fifo.h>
49 #include "hammer.h"
50
51 /*
52  * USERFS VNOPS
53  */
54 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
55 static int hammer_vop_fsync(struct vop_fsync_args *);
56 static int hammer_vop_read(struct vop_read_args *);
57 static int hammer_vop_write(struct vop_write_args *);
58 static int hammer_vop_access(struct vop_access_args *);
59 static int hammer_vop_advlock(struct vop_advlock_args *);
60 static int hammer_vop_close(struct vop_close_args *);
61 static int hammer_vop_ncreate(struct vop_ncreate_args *);
62 static int hammer_vop_getattr(struct vop_getattr_args *);
63 static int hammer_vop_nresolve(struct vop_nresolve_args *);
64 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
65 static int hammer_vop_nlink(struct vop_nlink_args *);
66 static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
67 static int hammer_vop_nmknod(struct vop_nmknod_args *);
68 static int hammer_vop_open(struct vop_open_args *);
69 static int hammer_vop_pathconf(struct vop_pathconf_args *);
70 static int hammer_vop_print(struct vop_print_args *);
71 static int hammer_vop_readdir(struct vop_readdir_args *);
72 static int hammer_vop_readlink(struct vop_readlink_args *);
73 static int hammer_vop_nremove(struct vop_nremove_args *);
74 static int hammer_vop_nrename(struct vop_nrename_args *);
75 static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
76 static int hammer_vop_setattr(struct vop_setattr_args *);
77 static int hammer_vop_strategy(struct vop_strategy_args *);
78 static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
79 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
80 static int hammer_vop_ioctl(struct vop_ioctl_args *);
81
82 static int hammer_vop_fifoclose (struct vop_close_args *);
83 static int hammer_vop_fiforead (struct vop_read_args *);
84 static int hammer_vop_fifowrite (struct vop_write_args *);
85
86 static int hammer_vop_specclose (struct vop_close_args *);
87 static int hammer_vop_specread (struct vop_read_args *);
88 static int hammer_vop_specwrite (struct vop_write_args *);
89
90 struct vop_ops hammer_vnode_vops = {
91         .vop_default =          vop_defaultop,
92         .vop_fsync =            hammer_vop_fsync,
93         .vop_getpages =         vop_stdgetpages,
94         .vop_putpages =         vop_stdputpages,
95         .vop_read =             hammer_vop_read,
96         .vop_write =            hammer_vop_write,
97         .vop_access =           hammer_vop_access,
98         .vop_advlock =          hammer_vop_advlock,
99         .vop_close =            hammer_vop_close,
100         .vop_ncreate =          hammer_vop_ncreate,
101         .vop_getattr =          hammer_vop_getattr,
102         .vop_inactive =         hammer_vop_inactive,
103         .vop_reclaim =          hammer_vop_reclaim,
104         .vop_nresolve =         hammer_vop_nresolve,
105         .vop_nlookupdotdot =    hammer_vop_nlookupdotdot,
106         .vop_nlink =            hammer_vop_nlink,
107         .vop_nmkdir =           hammer_vop_nmkdir,
108         .vop_nmknod =           hammer_vop_nmknod,
109         .vop_open =             hammer_vop_open,
110         .vop_pathconf =         hammer_vop_pathconf,
111         .vop_print =            hammer_vop_print,
112         .vop_readdir =          hammer_vop_readdir,
113         .vop_readlink =         hammer_vop_readlink,
114         .vop_nremove =          hammer_vop_nremove,
115         .vop_nrename =          hammer_vop_nrename,
116         .vop_nrmdir =           hammer_vop_nrmdir,
117         .vop_setattr =          hammer_vop_setattr,
118         .vop_strategy =         hammer_vop_strategy,
119         .vop_nsymlink =         hammer_vop_nsymlink,
120         .vop_nwhiteout =        hammer_vop_nwhiteout,
121         .vop_ioctl =            hammer_vop_ioctl
122 };
123
124 struct vop_ops hammer_spec_vops = {
125         .vop_default =          spec_vnoperate,
126         .vop_fsync =            hammer_vop_fsync,
127         .vop_read =             hammer_vop_specread,
128         .vop_write =            hammer_vop_specwrite,
129         .vop_access =           hammer_vop_access,
130         .vop_close =            hammer_vop_specclose,
131         .vop_getattr =          hammer_vop_getattr,
132         .vop_inactive =         hammer_vop_inactive,
133         .vop_reclaim =          hammer_vop_reclaim,
134         .vop_setattr =          hammer_vop_setattr
135 };
136
137 struct vop_ops hammer_fifo_vops = {
138         .vop_default =          fifo_vnoperate,
139         .vop_fsync =            hammer_vop_fsync,
140         .vop_read =             hammer_vop_fiforead,
141         .vop_write =            hammer_vop_fifowrite,
142         .vop_access =           hammer_vop_access,
143         .vop_close =            hammer_vop_fifoclose,
144         .vop_getattr =          hammer_vop_getattr,
145         .vop_inactive =         hammer_vop_inactive,
146         .vop_reclaim =          hammer_vop_reclaim,
147         .vop_setattr =          hammer_vop_setattr
148 };
149
150 static int hammer_dounlink(struct nchandle *nch, struct vnode *dvp,
151                            struct ucred *cred, int flags);
152 static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
153 static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
154
155 #if 0
156 static
157 int
158 hammer_vop_vnoperate(struct vop_generic_args *)
159 {
160         return (VOCALL(&hammer_vnode_vops, ap));
161 }
162 #endif
163
164 /*
165  * hammer_vop_fsync { vp, waitfor }
166  */
167 static
168 int
169 hammer_vop_fsync(struct vop_fsync_args *ap)
170 {
171         hammer_inode_t ip;
172         int error;
173
174         ip = VTOI(ap->a_vp);
175         error = hammer_sync_inode(ip, ap->a_waitfor, 0);
176         return (error);
177 }
178
179 /*
180  * hammer_vop_read { vp, uio, ioflag, cred }
181  */
182 static
183 int
184 hammer_vop_read(struct vop_read_args *ap)
185 {
186         struct hammer_transaction trans;
187         hammer_inode_t ip;
188         off_t offset;
189         struct buf *bp;
190         struct uio *uio;
191         int error;
192         int n;
193         int seqcount;
194
195         if (ap->a_vp->v_type != VREG)
196                 return (EINVAL);
197         ip = VTOI(ap->a_vp);
198         error = 0;
199         seqcount = ap->a_ioflag >> 16;
200
201         hammer_start_transaction(&trans, ip->hmp);
202
203         /*
204          * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
205          */
206         uio = ap->a_uio;
207         while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_rec.ino_size) {
208                 offset = uio->uio_offset & HAMMER_BUFMASK;
209 #if 0
210                 error = cluster_read(ap->a_vp, ip->ino_rec.ino_size,
211                                      uio->uio_offset - offset, HAMMER_BUFSIZE,
212                                      MAXBSIZE, seqcount, &bp);
213 #endif
214                 error = bread(ap->a_vp, uio->uio_offset - offset,
215                               HAMMER_BUFSIZE, &bp);
216                 if (error) {
217                         brelse(bp);
218                         break;
219                 }
220                 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
221                 n = HAMMER_BUFSIZE - offset;
222                 if (n > uio->uio_resid)
223                         n = uio->uio_resid;
224                 if (n > ip->ino_rec.ino_size - uio->uio_offset)
225                         n = (int)(ip->ino_rec.ino_size - uio->uio_offset);
226                 error = uiomove((char *)bp->b_data + offset, n, uio);
227                 if (error) {
228                         bqrelse(bp);
229                         break;
230                 }
231                 if ((ip->flags & HAMMER_INODE_RO) == 0) {
232                         ip->ino_rec.ino_atime = trans.tid;
233                         hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
234                 }
235                 bqrelse(bp);
236         }
237         hammer_commit_transaction(&trans);
238         return (error);
239 }
240
241 /*
242  * hammer_vop_write { vp, uio, ioflag, cred }
243  */
244 static
245 int
246 hammer_vop_write(struct vop_write_args *ap)
247 {
248         struct hammer_transaction trans;
249         struct hammer_inode *ip;
250         struct uio *uio;
251         off_t offset;
252         struct buf *bp;
253         int error;
254         int n;
255         int flags;
256
257         if (ap->a_vp->v_type != VREG)
258                 return (EINVAL);
259         ip = VTOI(ap->a_vp);
260         error = 0;
261
262         if (ip->flags & HAMMER_INODE_RO)
263                 return (EROFS);
264
265         /*
266          * Create a transaction to cover the operations we perform.
267          */
268         hammer_start_transaction(&trans, ip->hmp);
269         uio = ap->a_uio;
270
271         /*
272          * Check append mode
273          */
274         if (ap->a_ioflag & IO_APPEND)
275                 uio->uio_offset = ip->ino_rec.ino_size;
276
277         /*
278          * Check for illegal write offsets.  Valid range is 0...2^63-1
279          */
280         if (uio->uio_offset < 0 || uio->uio_offset + uio->uio_resid <= 0) {
281                 hammer_commit_transaction(&trans);
282                 return (EFBIG);
283         }
284
285         /*
286          * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
287          */
288         while (uio->uio_resid > 0) {
289                 int fixsize = 0;
290
291                 offset = uio->uio_offset & HAMMER_BUFMASK;
292                 n = HAMMER_BUFSIZE - offset;
293                 if (n > uio->uio_resid)
294                         n = uio->uio_resid;
295                 if (uio->uio_offset + n > ip->ino_rec.ino_size) {
296                         vnode_pager_setsize(ap->a_vp, uio->uio_offset + n);
297                         fixsize = 1;
298                 }
299
300                 if (uio->uio_segflg == UIO_NOCOPY) {
301                         /*
302                          * Issuing a write with the same data backing the
303                          * buffer.  Instantiate the buffer to collect the
304                          * backing vm pages, then read-in any missing bits.
305                          *
306                          * This case is used by vop_stdputpages().
307                          */
308                         bp = getblk(ap->a_vp, uio->uio_offset - offset,
309                                     HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
310                         if ((bp->b_flags & B_CACHE) == 0) {
311                                 bqrelse(bp);
312                                 error = bread(ap->a_vp,
313                                               uio->uio_offset - offset,
314                                               HAMMER_BUFSIZE, &bp);
315                         }
316                 } else if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
317                         /*
318                          * entirely overwrite the buffer
319                          */
320                         bp = getblk(ap->a_vp, uio->uio_offset - offset,
321                                     HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
322                 } else if (offset == 0 && uio->uio_offset >= ip->ino_rec.ino_size) {
323                         /*
324                          * XXX
325                          */
326                         bp = getblk(ap->a_vp, uio->uio_offset - offset,
327                                     HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
328                         vfs_bio_clrbuf(bp);
329                 } else {
330                         /*
331                          * Partial overwrite, read in any missing bits then
332                          * replace the portion being written.
333                          */
334                         error = bread(ap->a_vp, uio->uio_offset - offset,
335                                       HAMMER_BUFSIZE, &bp);
336                         if (error == 0)
337                                 bheavy(bp);
338                 }
339                 if (error == 0)
340                         error = uiomove((char *)bp->b_data + offset, n, uio);
341
342                 /*
343                  * If we screwed up we have to undo any VM size changes we
344                  * made.
345                  */
346                 if (error) {
347                         brelse(bp);
348                         if (fixsize) {
349                                 vtruncbuf(ap->a_vp, ip->ino_rec.ino_size,
350                                           HAMMER_BUFSIZE);
351                         }
352                         break;
353                 }
354                 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
355                 if (ip->ino_rec.ino_size < uio->uio_offset) {
356                         ip->ino_rec.ino_size = uio->uio_offset;
357                         flags = HAMMER_INODE_RDIRTY;
358                         vnode_pager_setsize(ap->a_vp, ip->ino_rec.ino_size);
359                 } else {
360                         flags = 0;
361                 }
362                 ip->ino_rec.ino_mtime = trans.tid;
363                 flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS;
364                 hammer_modify_inode(&trans, ip, flags);
365
366                 /*
367                  * The file write must be tagged with the same TID as the
368                  * inode, for consistency in case the inode changed size.
369                  * This guarantees the on-disk data records will have a
370                  * TID <= the inode TID representing the size change.
371                  *
372                  * If a prior write has not yet flushed, retain its TID.
373                  */
374                 if (bp->b_tid == 0)
375                         bp->b_tid = ip->last_tid;
376
377                 if (ap->a_ioflag & IO_SYNC) {
378                         bwrite(bp);
379                 } else if (ap->a_ioflag & IO_DIRECT) {
380                         bawrite(bp);
381                 } else {
382                         bdwrite(bp);
383                 }
384         }
385         if (error)
386                 hammer_abort_transaction(&trans);
387         else
388                 hammer_commit_transaction(&trans);
389         return (error);
390 }
391
392 /*
393  * hammer_vop_access { vp, mode, cred }
394  */
395 static
396 int
397 hammer_vop_access(struct vop_access_args *ap)
398 {
399         struct hammer_inode *ip = VTOI(ap->a_vp);
400         uid_t uid;
401         gid_t gid;
402         int error;
403
404         uid = hammer_to_unix_xid(&ip->ino_data.uid);
405         gid = hammer_to_unix_xid(&ip->ino_data.gid);
406
407         error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
408                                   ip->ino_data.uflags);
409         return (error);
410 }
411
412 /*
413  * hammer_vop_advlock { vp, id, op, fl, flags }
414  */
415 static
416 int
417 hammer_vop_advlock(struct vop_advlock_args *ap)
418 {
419         struct hammer_inode *ip = VTOI(ap->a_vp);
420
421         return (lf_advlock(ap, &ip->advlock, ip->ino_rec.ino_size));
422 }
423
424 /*
425  * hammer_vop_close { vp, fflag }
426  */
427 static
428 int
429 hammer_vop_close(struct vop_close_args *ap)
430 {
431         return (vop_stdclose(ap));
432 }
433
434 /*
435  * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
436  *
437  * The operating system has already ensured that the directory entry
438  * does not exist and done all appropriate namespace locking.
439  */
440 static
441 int
442 hammer_vop_ncreate(struct vop_ncreate_args *ap)
443 {
444         struct hammer_transaction trans;
445         struct hammer_inode *dip;
446         struct hammer_inode *nip;
447         struct nchandle *nch;
448         int error;
449
450         nch = ap->a_nch;
451         dip = VTOI(ap->a_dvp);
452
453         if (dip->flags & HAMMER_INODE_RO)
454                 return (EROFS);
455
456         /*
457          * Create a transaction to cover the operations we perform.
458          */
459         hammer_start_transaction(&trans, dip->hmp);
460
461         /*
462          * Create a new filesystem object of the requested type.  The
463          * returned inode will be referenced but not locked.
464          */
465
466         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
467         if (error)
468                 kprintf("hammer_create_inode error %d\n", error);
469         if (error) {
470                 hammer_abort_transaction(&trans);
471                 *ap->a_vpp = NULL;
472                 return (error);
473         }
474
475         /*
476          * Add the new filesystem object to the directory.  This will also
477          * bump the inode's link count.
478          */
479         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
480         if (error)
481                 kprintf("hammer_ip_add_directory error %d\n", error);
482
483         /*
484          * Finish up.
485          */
486         if (error) {
487                 hammer_rel_inode(nip, 0);
488                 hammer_abort_transaction(&trans);
489                 *ap->a_vpp = NULL;
490         } else {
491                 hammer_commit_transaction(&trans);
492                 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
493                 hammer_rel_inode(nip, 0);
494                 if (error == 0) {
495                         cache_setunresolved(ap->a_nch);
496                         cache_setvp(ap->a_nch, *ap->a_vpp);
497                 }
498         }
499         return (error);
500 }
501
502 /*
503  * hammer_vop_getattr { vp, vap }
504  */
505 static
506 int
507 hammer_vop_getattr(struct vop_getattr_args *ap)
508 {
509         struct hammer_inode *ip = VTOI(ap->a_vp);
510         struct vattr *vap = ap->a_vap;
511
512 #if 0
513         if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) &&
514             (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 &&
515             ip->obj_asof == XXX
516         ) {
517                 /* LAZYMOD XXX */
518         }
519         hammer_itimes(ap->a_vp);
520 #endif
521
522         vap->va_fsid = ip->hmp->fsid_udev;
523         vap->va_fileid = ip->ino_rec.base.base.obj_id;
524         vap->va_mode = ip->ino_data.mode;
525         vap->va_nlink = ip->ino_rec.ino_nlinks;
526         vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
527         vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
528         vap->va_rmajor = 0;
529         vap->va_rminor = 0;
530         vap->va_size = ip->ino_rec.ino_size;
531         hammer_to_timespec(ip->ino_rec.ino_atime, &vap->va_atime);
532         hammer_to_timespec(ip->ino_rec.ino_mtime, &vap->va_mtime);
533         hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
534         vap->va_flags = ip->ino_data.uflags;
535         vap->va_gen = 1;        /* hammer inums are unique for all time */
536         vap->va_blocksize = 32768; /* XXX - extract from root volume */
537         vap->va_bytes = ip->ino_rec.ino_size;
538         vap->va_type = hammer_get_vnode_type(ip->ino_rec.base.base.obj_type);
539         vap->va_filerev = 0;    /* XXX */
540         /* mtime uniquely identifies any adjustments made to the file */
541         vap->va_fsmid = ip->ino_rec.ino_mtime;
542         vap->va_uid_uuid = ip->ino_data.uid;
543         vap->va_gid_uuid = ip->ino_data.gid;
544         vap->va_fsid_uuid = ip->hmp->fsid;
545         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
546                           VA_FSID_UUID_VALID;
547
548         switch (ip->ino_rec.base.base.obj_type) {
549         case HAMMER_OBJTYPE_CDEV:
550         case HAMMER_OBJTYPE_BDEV:
551                 vap->va_rmajor = ip->ino_data.rmajor;
552                 vap->va_rminor = ip->ino_data.rminor;
553                 break;
554         default:
555                 break;
556         }
557
558         return(0);
559 }
560
561 /*
562  * hammer_vop_nresolve { nch, dvp, cred }
563  *
564  * Locate the requested directory entry.
565  */
566 static
567 int
568 hammer_vop_nresolve(struct vop_nresolve_args *ap)
569 {
570         struct namecache *ncp;
571         hammer_inode_t dip;
572         hammer_inode_t ip;
573         hammer_tid_t asof;
574         struct hammer_cursor cursor;
575         union hammer_record_ondisk *rec;
576         struct vnode *vp;
577         int64_t namekey;
578         int error;
579         int i;
580         int nlen;
581         int flags;
582         u_int64_t obj_id;
583
584         /*
585          * Misc initialization, plus handle as-of name extensions.  Look for
586          * the '@@' extension.  Note that as-of files and directories cannot
587          * be modified.
588          */
589         dip = VTOI(ap->a_dvp);
590         ncp = ap->a_nch->ncp;
591         asof = dip->obj_asof;
592         nlen = ncp->nc_nlen;
593         flags = dip->flags;
594
595         for (i = 0; i < nlen; ++i) {
596                 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
597                         asof = hammer_str_to_tid(ncp->nc_name + i + 2);
598                         flags |= HAMMER_INODE_RO;
599                         break;
600                 }
601         }
602         nlen = i;
603
604         /*
605          * If there is no path component the time extension is relative to
606          * dip.
607          */
608         if (nlen == 0) {
609                 ip = hammer_get_inode(dip->hmp, &dip->cache[1], dip->obj_id,
610                                       asof, flags, &error);
611                 if (error == 0) {
612                         error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
613                         hammer_rel_inode(ip, 0);
614                 } else {
615                         vp = NULL;
616                 }
617                 if (error == 0) {
618                         vn_unlock(vp);
619                         cache_setvp(ap->a_nch, vp);
620                         vrele(vp);
621                 }
622                 return(error);
623         }
624
625         /*
626          * Calculate the namekey and setup the key range for the scan.  This
627          * works kinda like a chained hash table where the lower 32 bits
628          * of the namekey synthesize the chain.
629          *
630          * The key range is inclusive of both key_beg and key_end.
631          */
632         namekey = hammer_directory_namekey(ncp->nc_name, nlen);
633
634         error = hammer_init_cursor_hmp(&cursor, &dip->cache[0], dip->hmp);
635         cursor.key_beg.obj_id = dip->obj_id;
636         cursor.key_beg.key = namekey;
637         cursor.key_beg.create_tid = 0;
638         cursor.key_beg.delete_tid = 0;
639         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
640         cursor.key_beg.obj_type = 0;
641
642         cursor.key_end = cursor.key_beg;
643         cursor.key_end.key |= 0xFFFFFFFFULL;
644         cursor.asof = asof;
645         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
646
647         /*
648          * Scan all matching records (the chain), locate the one matching
649          * the requested path component.
650          *
651          * The hammer_ip_*() functions merge in-memory records with on-disk
652          * records for the purposes of the search.
653          */
654         if (error == 0)
655                 error = hammer_ip_first(&cursor, dip);
656
657         rec = NULL;
658         obj_id = 0;
659
660         while (error == 0) {
661                 error = hammer_ip_resolve_data(&cursor);
662                 if (error)
663                         break;
664                 rec = cursor.record;
665                 if (nlen == rec->entry.base.data_len &&
666                     bcmp(ncp->nc_name, cursor.data, nlen) == 0) {
667                         obj_id = rec->entry.obj_id;
668                         break;
669                 }
670                 error = hammer_ip_next(&cursor);
671         }
672         hammer_done_cursor(&cursor);
673         if (error == 0) {
674                 ip = hammer_get_inode(dip->hmp, &dip->cache[1],
675                                       obj_id, asof, flags, &error);
676                 if (error == 0) {
677                         error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
678                         hammer_rel_inode(ip, 0);
679                 } else {
680                         vp = NULL;
681                 }
682                 if (error == 0) {
683                         vn_unlock(vp);
684                         cache_setvp(ap->a_nch, vp);
685                         vrele(vp);
686                 }
687         } else if (error == ENOENT) {
688                 cache_setvp(ap->a_nch, NULL);
689         }
690         return (error);
691 }
692
693 /*
694  * hammer_vop_nlookupdotdot { dvp, vpp, cred }
695  *
696  * Locate the parent directory of a directory vnode.
697  *
698  * dvp is referenced but not locked.  *vpp must be returned referenced and
699  * locked.  A parent_obj_id of 0 does not necessarily indicate that we are
700  * at the root, instead it could indicate that the directory we were in was
701  * removed.
702  */
703 static
704 int
705 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
706 {
707         struct hammer_inode *dip;
708         struct hammer_inode *ip;
709         u_int64_t parent_obj_id;
710         int error;
711
712         dip = VTOI(ap->a_dvp);
713         if ((parent_obj_id = dip->ino_data.parent_obj_id) == 0) {
714                 *ap->a_vpp = NULL;
715                 return ENOENT;
716         }
717
718         ip = hammer_get_inode(dip->hmp, &dip->cache[1], parent_obj_id,
719                               dip->obj_asof, dip->flags, &error);
720         if (ip == NULL) {
721                 *ap->a_vpp = NULL;
722                 return(error);
723         }
724         error = hammer_get_vnode(ip, LK_EXCLUSIVE, ap->a_vpp);
725         hammer_rel_inode(ip, 0);
726         return (error);
727 }
728
729 /*
730  * hammer_vop_nlink { nch, dvp, vp, cred }
731  */
732 static
733 int
734 hammer_vop_nlink(struct vop_nlink_args *ap)
735 {
736         struct hammer_transaction trans;
737         struct hammer_inode *dip;
738         struct hammer_inode *ip;
739         struct nchandle *nch;
740         int error;
741
742         nch = ap->a_nch;
743         dip = VTOI(ap->a_dvp);
744         ip = VTOI(ap->a_vp);
745
746         if (dip->flags & HAMMER_INODE_RO)
747                 return (EROFS);
748         if (ip->flags & HAMMER_INODE_RO)
749                 return (EROFS);
750
751         /*
752          * Create a transaction to cover the operations we perform.
753          */
754         hammer_start_transaction(&trans, dip->hmp);
755
756         /*
757          * Add the filesystem object to the directory.  Note that neither
758          * dip nor ip are referenced or locked, but their vnodes are
759          * referenced.  This function will bump the inode's link count.
760          */
761         error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip);
762
763         /*
764          * Finish up.
765          */
766         if (error) {
767                 hammer_abort_transaction(&trans);
768         } else {
769                 cache_setunresolved(nch);
770                 cache_setvp(nch, ap->a_vp);
771                 hammer_commit_transaction(&trans);
772         }
773         return (error);
774 }
775
776 /*
777  * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
778  *
779  * The operating system has already ensured that the directory entry
780  * does not exist and done all appropriate namespace locking.
781  */
782 static
783 int
784 hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
785 {
786         struct hammer_transaction trans;
787         struct hammer_inode *dip;
788         struct hammer_inode *nip;
789         struct nchandle *nch;
790         int error;
791
792         nch = ap->a_nch;
793         dip = VTOI(ap->a_dvp);
794
795         if (dip->flags & HAMMER_INODE_RO)
796                 return (EROFS);
797
798         /*
799          * Create a transaction to cover the operations we perform.
800          */
801         hammer_start_transaction(&trans, dip->hmp);
802
803         /*
804          * Create a new filesystem object of the requested type.  The
805          * returned inode will be referenced but not locked.
806          */
807         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
808         if (error)
809                 kprintf("hammer_mkdir error %d\n", error);
810         if (error) {
811                 hammer_abort_transaction(&trans);
812                 *ap->a_vpp = NULL;
813                 return (error);
814         }
815
816         /*
817          * Add the new filesystem object to the directory.  This will also
818          * bump the inode's link count.
819          */
820         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
821         if (error)
822                 kprintf("hammer_mkdir (add) error %d\n", error);
823
824         /*
825          * Finish up.
826          */
827         if (error) {
828                 hammer_rel_inode(nip, 0);
829                 hammer_abort_transaction(&trans);
830                 *ap->a_vpp = NULL;
831         } else {
832                 hammer_commit_transaction(&trans);
833                 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
834                 hammer_rel_inode(nip, 0);
835                 if (error == 0) {
836                         cache_setunresolved(ap->a_nch);
837                         cache_setvp(ap->a_nch, *ap->a_vpp);
838                 }
839         }
840         return (error);
841 }
842
843 /*
844  * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
845  *
846  * The operating system has already ensured that the directory entry
847  * does not exist and done all appropriate namespace locking.
848  */
849 static
850 int
851 hammer_vop_nmknod(struct vop_nmknod_args *ap)
852 {
853         struct hammer_transaction trans;
854         struct hammer_inode *dip;
855         struct hammer_inode *nip;
856         struct nchandle *nch;
857         int error;
858
859         nch = ap->a_nch;
860         dip = VTOI(ap->a_dvp);
861
862         if (dip->flags & HAMMER_INODE_RO)
863                 return (EROFS);
864
865         /*
866          * Create a transaction to cover the operations we perform.
867          */
868         hammer_start_transaction(&trans, dip->hmp);
869
870         /*
871          * Create a new filesystem object of the requested type.  The
872          * returned inode will be referenced but not locked.
873          */
874         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
875         if (error) {
876                 hammer_abort_transaction(&trans);
877                 *ap->a_vpp = NULL;
878                 return (error);
879         }
880
881         /*
882          * Add the new filesystem object to the directory.  This will also
883          * bump the inode's link count.
884          */
885         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
886
887         /*
888          * Finish up.
889          */
890         if (error) {
891                 hammer_rel_inode(nip, 0);
892                 hammer_abort_transaction(&trans);
893                 *ap->a_vpp = NULL;
894         } else {
895                 hammer_commit_transaction(&trans);
896                 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
897                 hammer_rel_inode(nip, 0);
898                 if (error == 0) {
899                         cache_setunresolved(ap->a_nch);
900                         cache_setvp(ap->a_nch, *ap->a_vpp);
901                 }
902         }
903         return (error);
904 }
905
906 /*
907  * hammer_vop_open { vp, mode, cred, fp }
908  */
909 static
910 int
911 hammer_vop_open(struct vop_open_args *ap)
912 {
913         if ((ap->a_mode & FWRITE) && (VTOI(ap->a_vp)->flags & HAMMER_INODE_RO))
914                 return (EROFS);
915
916         return(vop_stdopen(ap));
917 }
918
919 /*
920  * hammer_vop_pathconf { vp, name, retval }
921  */
922 static
923 int
924 hammer_vop_pathconf(struct vop_pathconf_args *ap)
925 {
926         return EOPNOTSUPP;
927 }
928
929 /*
930  * hammer_vop_print { vp }
931  */
932 static
933 int
934 hammer_vop_print(struct vop_print_args *ap)
935 {
936         return EOPNOTSUPP;
937 }
938
939 /*
940  * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
941  */
942 static
943 int
944 hammer_vop_readdir(struct vop_readdir_args *ap)
945 {
946         struct hammer_cursor cursor;
947         struct hammer_inode *ip;
948         struct uio *uio;
949         hammer_record_ondisk_t rec;
950         hammer_base_elm_t base;
951         int error;
952         int cookie_index;
953         int ncookies;
954         off_t *cookies;
955         off_t saveoff;
956         int r;
957
958         ip = VTOI(ap->a_vp);
959         uio = ap->a_uio;
960         saveoff = uio->uio_offset;
961
962         if (ap->a_ncookies) {
963                 ncookies = uio->uio_resid / 16 + 1;
964                 if (ncookies > 1024)
965                         ncookies = 1024;
966                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
967                 cookie_index = 0;
968         } else {
969                 ncookies = -1;
970                 cookies = NULL;
971                 cookie_index = 0;
972         }
973
974         /*
975          * Handle artificial entries
976          */
977         error = 0;
978         if (saveoff == 0) {
979                 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, ".");
980                 if (r)
981                         goto done;
982                 if (cookies)
983                         cookies[cookie_index] = saveoff;
984                 ++saveoff;
985                 ++cookie_index;
986                 if (cookie_index == ncookies)
987                         goto done;
988         }
989         if (saveoff == 1) {
990                 if (ip->ino_data.parent_obj_id) {
991                         r = vop_write_dirent(&error, uio,
992                                              ip->ino_data.parent_obj_id,
993                                              DT_DIR, 2, "..");
994                 } else {
995                         r = vop_write_dirent(&error, uio,
996                                              ip->obj_id, DT_DIR, 2, "..");
997                 }
998                 if (r)
999                         goto done;
1000                 if (cookies)
1001                         cookies[cookie_index] = saveoff;
1002                 ++saveoff;
1003                 ++cookie_index;
1004                 if (cookie_index == ncookies)
1005                         goto done;
1006         }
1007
1008         /*
1009          * Key range (begin and end inclusive) to scan.  Directory keys
1010          * directly translate to a 64 bit 'seek' position.
1011          */
1012         hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp);
1013         cursor.key_beg.obj_id = ip->obj_id;
1014         cursor.key_beg.create_tid = 0;
1015         cursor.key_beg.delete_tid = 0;
1016         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1017         cursor.key_beg.obj_type = 0;
1018         cursor.key_beg.key = saveoff;
1019
1020         cursor.key_end = cursor.key_beg;
1021         cursor.key_end.key = HAMMER_MAX_KEY;
1022         cursor.asof = ip->obj_asof;
1023         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1024
1025         error = hammer_ip_first(&cursor, ip);
1026
1027         while (error == 0) {
1028                 error = hammer_ip_resolve_data(&cursor);
1029                 if (error)
1030                         break;
1031                 rec = cursor.record;
1032                 base = &rec->base.base;
1033                 saveoff = base->key;
1034
1035                 if (base->obj_id != ip->obj_id)
1036                         panic("readdir: bad record at %p", cursor.node);
1037
1038                 r = vop_write_dirent(
1039                              &error, uio, rec->entry.obj_id,
1040                              hammer_get_dtype(rec->entry.base.base.obj_type),
1041                              rec->entry.base.data_len,
1042                              (void *)cursor.data);
1043                 if (r)
1044                         break;
1045                 ++saveoff;
1046                 if (cookies)
1047                         cookies[cookie_index] = base->key;
1048                 ++cookie_index;
1049                 if (cookie_index == ncookies)
1050                         break;
1051                 error = hammer_ip_next(&cursor);
1052         }
1053         hammer_done_cursor(&cursor);
1054
1055 done:
1056         if (ap->a_eofflag)
1057                 *ap->a_eofflag = (error == ENOENT);
1058         uio->uio_offset = saveoff;
1059         if (error && cookie_index == 0) {
1060                 if (error == ENOENT)
1061                         error = 0;
1062                 if (cookies) {
1063                         kfree(cookies, M_TEMP);
1064                         *ap->a_ncookies = 0;
1065                         *ap->a_cookies = NULL;
1066                 }
1067         } else {
1068                 if (error == ENOENT)
1069                         error = 0;
1070                 if (cookies) {
1071                         *ap->a_ncookies = cookie_index;
1072                         *ap->a_cookies = cookies;
1073                 }
1074         }
1075         return(error);
1076 }
1077
1078 /*
1079  * hammer_vop_readlink { vp, uio, cred }
1080  */
1081 static
1082 int
1083 hammer_vop_readlink(struct vop_readlink_args *ap)
1084 {
1085         struct hammer_cursor cursor;
1086         struct hammer_inode *ip;
1087         int error;
1088
1089         ip = VTOI(ap->a_vp);
1090         hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp);
1091
1092         /*
1093          * Key range (begin and end inclusive) to scan.  Directory keys
1094          * directly translate to a 64 bit 'seek' position.
1095          */
1096         cursor.key_beg.obj_id = ip->obj_id;
1097         cursor.key_beg.create_tid = 0;
1098         cursor.key_beg.delete_tid = 0;
1099         cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX;
1100         cursor.key_beg.obj_type = 0;
1101         cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK;
1102         cursor.asof = ip->obj_asof;
1103         cursor.flags |= HAMMER_CURSOR_ASOF;
1104
1105         error = hammer_ip_lookup(&cursor, ip);
1106         if (error == 0) {
1107                 error = hammer_ip_resolve_data(&cursor);
1108                 if (error == 0) {
1109                         error = uiomove((char *)cursor.data,
1110                                         cursor.record->generic.base.data_len,
1111                                         ap->a_uio);
1112                 }
1113         }
1114         hammer_done_cursor(&cursor);
1115         return(error);
1116 }
1117
1118 /*
1119  * hammer_vop_nremove { nch, dvp, cred }
1120  */
1121 static
1122 int
1123 hammer_vop_nremove(struct vop_nremove_args *ap)
1124 {
1125         return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
1126 }
1127
1128 /*
1129  * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1130  */
1131 static
1132 int
1133 hammer_vop_nrename(struct vop_nrename_args *ap)
1134 {
1135         struct hammer_transaction trans;
1136         struct namecache *fncp;
1137         struct namecache *tncp;
1138         struct hammer_inode *fdip;
1139         struct hammer_inode *tdip;
1140         struct hammer_inode *ip;
1141         struct hammer_cursor cursor;
1142         union hammer_record_ondisk *rec;
1143         int64_t namekey;
1144         int error;
1145
1146         fdip = VTOI(ap->a_fdvp);
1147         tdip = VTOI(ap->a_tdvp);
1148         fncp = ap->a_fnch->ncp;
1149         tncp = ap->a_tnch->ncp;
1150         ip = VTOI(fncp->nc_vp);
1151         KKASSERT(ip != NULL);
1152
1153         if (fdip->flags & HAMMER_INODE_RO)
1154                 return (EROFS);
1155         if (tdip->flags & HAMMER_INODE_RO)
1156                 return (EROFS);
1157         if (ip->flags & HAMMER_INODE_RO)
1158                 return (EROFS);
1159
1160         hammer_start_transaction(&trans, fdip->hmp);
1161
1162         /*
1163          * Remove tncp from the target directory and then link ip as
1164          * tncp. XXX pass trans to dounlink
1165          */
1166         error = hammer_dounlink(ap->a_tnch, ap->a_tdvp, ap->a_cred, 0);
1167         if (error == 0 || error == ENOENT)
1168                 error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
1169         if (error)
1170                 goto failed; /* XXX */
1171
1172         /*
1173          * Locate the record in the originating directory and remove it.
1174          *
1175          * Calculate the namekey and setup the key range for the scan.  This
1176          * works kinda like a chained hash table where the lower 32 bits
1177          * of the namekey synthesize the chain.
1178          *
1179          * The key range is inclusive of both key_beg and key_end.
1180          */
1181         namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
1182 retry:
1183         hammer_init_cursor_hmp(&cursor, &fdip->cache[0], fdip->hmp);
1184         cursor.key_beg.obj_id = fdip->obj_id;
1185         cursor.key_beg.key = namekey;
1186         cursor.key_beg.create_tid = 0;
1187         cursor.key_beg.delete_tid = 0;
1188         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1189         cursor.key_beg.obj_type = 0;
1190
1191         cursor.key_end = cursor.key_beg;
1192         cursor.key_end.key |= 0xFFFFFFFFULL;
1193         cursor.asof = fdip->obj_asof;
1194         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1195
1196         /*
1197          * Scan all matching records (the chain), locate the one matching
1198          * the requested path component.
1199          *
1200          * The hammer_ip_*() functions merge in-memory records with on-disk
1201          * records for the purposes of the search.
1202          */
1203         error = hammer_ip_first(&cursor, fdip);
1204         while (error == 0) {
1205                 if (hammer_ip_resolve_data(&cursor) != 0)
1206                         break;
1207                 rec = cursor.record;
1208                 if (fncp->nc_nlen == rec->entry.base.data_len &&
1209                     bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) {
1210                         break;
1211                 }
1212                 error = hammer_ip_next(&cursor);
1213         }
1214
1215         /*
1216          * If all is ok we have to get the inode so we can adjust nlinks.
1217          *
1218          * WARNING: hammer_ip_del_directory() may have to terminate the
1219          * cursor to avoid a recursion.  It's ok to call hammer_done_cursor()
1220          * twice.
1221          */
1222         if (error == 0)
1223                 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
1224         hammer_done_cursor(&cursor);
1225         if (error == 0)
1226                 cache_rename(ap->a_fnch, ap->a_tnch);
1227         if (error == EDEADLK)
1228                 goto retry;
1229 failed:
1230         if (error == 0) {
1231                 hammer_commit_transaction(&trans);
1232         } else {
1233                 hammer_abort_transaction(&trans);
1234         }
1235         return (error);
1236 }
1237
1238 /*
1239  * hammer_vop_nrmdir { nch, dvp, cred }
1240  */
1241 static
1242 int
1243 hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
1244 {
1245         return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
1246 }
1247
1248 /*
1249  * hammer_vop_setattr { vp, vap, cred }
1250  */
1251 static
1252 int
1253 hammer_vop_setattr(struct vop_setattr_args *ap)
1254 {
1255         struct hammer_transaction trans;
1256         struct hammer_cursor *spike = NULL;
1257         struct vattr *vap;
1258         struct hammer_inode *ip;
1259         int modflags;
1260         int error;
1261         int truncating;
1262         int64_t aligned_size;
1263         u_int32_t flags;
1264         uuid_t uuid;
1265
1266         vap = ap->a_vap;
1267         ip = ap->a_vp->v_data;
1268         modflags = 0;
1269
1270         if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1271                 return(EROFS);
1272         if (ip->flags & HAMMER_INODE_RO)
1273                 return (EROFS);
1274
1275         hammer_start_transaction(&trans, ip->hmp);
1276         error = 0;
1277
1278         if (vap->va_flags != VNOVAL) {
1279                 flags = ip->ino_data.uflags;
1280                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
1281                                          hammer_to_unix_xid(&ip->ino_data.uid),
1282                                          ap->a_cred);
1283                 if (error == 0) {
1284                         if (ip->ino_data.uflags != flags) {
1285                                 ip->ino_data.uflags = flags;
1286                                 modflags |= HAMMER_INODE_DDIRTY;
1287                         }
1288                         if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1289                                 error = 0;
1290                                 goto done;
1291                         }
1292                 }
1293                 goto done;
1294         }
1295         if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1296                 error = EPERM;
1297                 goto done;
1298         }
1299         if (vap->va_uid != (uid_t)VNOVAL) {
1300                 hammer_guid_to_uuid(&uuid, vap->va_uid);
1301                 if (bcmp(&uuid, &ip->ino_data.uid, sizeof(uuid)) != 0) {
1302                         ip->ino_data.uid = uuid;
1303                         modflags |= HAMMER_INODE_DDIRTY;
1304                 }
1305         }
1306         if (vap->va_gid != (uid_t)VNOVAL) {
1307                 hammer_guid_to_uuid(&uuid, vap->va_gid);
1308                 if (bcmp(&uuid, &ip->ino_data.gid, sizeof(uuid)) != 0) {
1309                         ip->ino_data.gid = uuid;
1310                         modflags |= HAMMER_INODE_DDIRTY;
1311                 }
1312         }
1313         while (vap->va_size != VNOVAL && ip->ino_rec.ino_size != vap->va_size) {
1314                 switch(ap->a_vp->v_type) {
1315                 case VREG:
1316                         if (vap->va_size == ip->ino_rec.ino_size)
1317                                 break;
1318                         if (vap->va_size < ip->ino_rec.ino_size) {
1319                                 vtruncbuf(ap->a_vp, vap->va_size,
1320                                           HAMMER_BUFSIZE);
1321                                 truncating = 1;
1322                         } else {
1323                                 vnode_pager_setsize(ap->a_vp, vap->va_size);
1324                                 truncating = 0;
1325                         }
1326                         ip->ino_rec.ino_size = vap->va_size;
1327                         modflags |= HAMMER_INODE_RDIRTY;
1328                         aligned_size = (vap->va_size + HAMMER_BUFMASK) &
1329                                         ~(int64_t)HAMMER_BUFMASK;
1330
1331                         if (truncating) {
1332                                 error = hammer_ip_delete_range(&trans, ip,
1333                                                     aligned_size,
1334                                                     0x7FFFFFFFFFFFFFFFLL,
1335                                                     &spike);
1336                         }
1337                         /*
1338                          * If truncating we have to clean out a portion of
1339                          * the last block on-disk.
1340                          */
1341                         if (truncating && error == 0 &&
1342                             vap->va_size < aligned_size) {
1343                                 struct buf *bp;
1344                                 int offset;
1345
1346                                 offset = vap->va_size & HAMMER_BUFMASK;
1347                                 error = bread(ap->a_vp,
1348                                               aligned_size - HAMMER_BUFSIZE,
1349                                               HAMMER_BUFSIZE, &bp);
1350                                 if (error == 0) {
1351                                         bzero(bp->b_data + offset,
1352                                               HAMMER_BUFSIZE - offset);
1353                                         bdwrite(bp);
1354                                 } else {
1355                                         brelse(bp);
1356                                 }
1357                         }
1358                         break;
1359                 case VDATABASE:
1360                         error = hammer_ip_delete_range(&trans, ip,
1361                                                     vap->va_size,
1362                                                     0x7FFFFFFFFFFFFFFFLL,
1363                                                     &spike);
1364                         ip->ino_rec.ino_size = vap->va_size;
1365                         modflags |= HAMMER_INODE_RDIRTY;
1366                         break;
1367                 default:
1368                         error = EINVAL;
1369                         goto done;
1370                 }
1371                 if (error == ENOSPC) {
1372                         error = hammer_spike(&spike);
1373                         if (error == 0)
1374                                 continue;
1375                 }
1376                 KKASSERT(spike == NULL);
1377                 break;
1378         }
1379         if (vap->va_atime.tv_sec != VNOVAL) {
1380                 ip->ino_rec.ino_atime =
1381                         hammer_timespec_to_transid(&vap->va_atime);
1382                 modflags |= HAMMER_INODE_ITIMES;
1383         }
1384         if (vap->va_mtime.tv_sec != VNOVAL) {
1385                 ip->ino_rec.ino_mtime =
1386                         hammer_timespec_to_transid(&vap->va_mtime);
1387                 modflags |= HAMMER_INODE_ITIMES;
1388         }
1389         if (vap->va_mode != (mode_t)VNOVAL) {
1390                 if (ip->ino_data.mode != vap->va_mode) {
1391                         ip->ino_data.mode = vap->va_mode;
1392                         modflags |= HAMMER_INODE_DDIRTY;
1393                 }
1394         }
1395 done:
1396         if (error) {
1397                 hammer_abort_transaction(&trans);
1398         } else {
1399                 hammer_modify_inode(&trans, ip, modflags);
1400                 hammer_commit_transaction(&trans);
1401         }
1402         return (error);
1403 }
1404
1405 /*
1406  * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1407  */
1408 static
1409 int
1410 hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
1411 {
1412         struct hammer_transaction trans;
1413         struct hammer_inode *dip;
1414         struct hammer_inode *nip;
1415         struct nchandle *nch;
1416         hammer_record_t record;
1417         int error;
1418         int bytes;
1419
1420         ap->a_vap->va_type = VLNK;
1421
1422         nch = ap->a_nch;
1423         dip = VTOI(ap->a_dvp);
1424
1425         if (dip->flags & HAMMER_INODE_RO)
1426                 return (EROFS);
1427
1428         /*
1429          * Create a transaction to cover the operations we perform.
1430          */
1431         hammer_start_transaction(&trans, dip->hmp);
1432
1433         /*
1434          * Create a new filesystem object of the requested type.  The
1435          * returned inode will be referenced but not locked.
1436          */
1437
1438         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
1439         if (error) {
1440                 hammer_abort_transaction(&trans);
1441                 *ap->a_vpp = NULL;
1442                 return (error);
1443         }
1444
1445         /*
1446          * Add the new filesystem object to the directory.  This will also
1447          * bump the inode's link count.
1448          */
1449         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
1450
1451         /*
1452          * Add a record representing the symlink.  symlink stores the link
1453          * as pure data, not a string, and is no \0 terminated.
1454          */
1455         if (error == 0) {
1456                 record = hammer_alloc_mem_record(nip);
1457                 bytes = strlen(ap->a_target);
1458
1459                 record->rec.generic.base.base.key = HAMMER_FIXKEY_SYMLINK;
1460                 record->rec.generic.base.base.rec_type = HAMMER_RECTYPE_FIX;
1461                 record->rec.generic.base.data_len = bytes;
1462                 if (bytes <= sizeof(record->rec.generic.filler)) {
1463                         record->data = (void *)record->rec.generic.filler;
1464                         bcopy(ap->a_target, record->data, bytes);
1465                 } else {
1466                         record->data = (void *)ap->a_target;
1467                         /* will be reallocated by routine below */
1468                 }
1469                 error = hammer_ip_add_record(&trans, record);
1470         }
1471
1472         /*
1473          * Finish up.
1474          */
1475         if (error) {
1476                 hammer_rel_inode(nip, 0);
1477                 hammer_abort_transaction(&trans);
1478                 *ap->a_vpp = NULL;
1479         } else {
1480                 hammer_commit_transaction(&trans);
1481                 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
1482                 hammer_rel_inode(nip, 0);
1483                 if (error == 0) {
1484                         cache_setunresolved(ap->a_nch);
1485                         cache_setvp(ap->a_nch, *ap->a_vpp);
1486                 }
1487         }
1488         return (error);
1489 }
1490
1491 /*
1492  * hammer_vop_nwhiteout { nch, dvp, cred, flags }
1493  */
1494 static
1495 int
1496 hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
1497 {
1498         return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, ap->a_flags));
1499 }
1500
1501 /*
1502  * hammer_vop_ioctl { vp, command, data, fflag, cred }
1503  */
1504 static
1505 int
1506 hammer_vop_ioctl(struct vop_ioctl_args *ap)
1507 {
1508         struct hammer_inode *ip = ap->a_vp->v_data;
1509
1510         return(hammer_ioctl(ip, ap->a_command, ap->a_data,
1511                             ap->a_fflag, ap->a_cred));
1512 }
1513
1514 /*
1515  * hammer_vop_strategy { vp, bio }
1516  *
1517  * Strategy call, used for regular file read & write only.  Note that the
1518  * bp may represent a cluster.
1519  *
1520  * To simplify operation and allow better optimizations in the future,
1521  * this code does not make any assumptions with regards to buffer alignment
1522  * or size.
1523  */
1524 static
1525 int
1526 hammer_vop_strategy(struct vop_strategy_args *ap)
1527 {
1528         struct buf *bp;
1529         int error;
1530
1531         bp = ap->a_bio->bio_buf;
1532
1533         switch(bp->b_cmd) {
1534         case BUF_CMD_READ:
1535                 error = hammer_vop_strategy_read(ap);
1536                 break;
1537         case BUF_CMD_WRITE:
1538                 error = hammer_vop_strategy_write(ap);
1539                 break;
1540         default:
1541                 error = EINVAL;
1542                 break;
1543         }
1544         bp->b_error = error;
1545         if (error)
1546                 bp->b_flags |= B_ERROR;
1547         biodone(ap->a_bio);
1548         return (error);
1549 }
1550
1551 /*
1552  * Read from a regular file.  Iterate the related records and fill in the
1553  * BIO/BUF.  Gaps are zero-filled.
1554  *
1555  * The support code in hammer_object.c should be used to deal with mixed
1556  * in-memory and on-disk records.
1557  *
1558  * XXX atime update
1559  */
1560 static
1561 int
1562 hammer_vop_strategy_read(struct vop_strategy_args *ap)
1563 {
1564         struct hammer_inode *ip = ap->a_vp->v_data;
1565         struct hammer_cursor cursor;
1566         hammer_record_ondisk_t rec;
1567         hammer_base_elm_t base;
1568         struct bio *bio;
1569         struct buf *bp;
1570         int64_t rec_offset;
1571         int64_t ran_end;
1572         int64_t tmp64;
1573         int error;
1574         int boff;
1575         int roff;
1576         int n;
1577
1578         bio = ap->a_bio;
1579         bp = bio->bio_buf;
1580
1581         hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp);
1582
1583         /*
1584          * Key range (begin and end inclusive) to scan.  Note that the key's
1585          * stored in the actual records represent BASE+LEN, not BASE.  The
1586          * first record containing bio_offset will have a key > bio_offset.
1587          */
1588         cursor.key_beg.obj_id = ip->obj_id;
1589         cursor.key_beg.create_tid = 0;
1590         cursor.key_beg.delete_tid = 0;
1591         cursor.key_beg.obj_type = 0;
1592         cursor.key_beg.key = bio->bio_offset + 1;
1593         cursor.asof = ip->obj_asof;
1594         cursor.flags |= HAMMER_CURSOR_ASOF;
1595
1596         cursor.key_end = cursor.key_beg;
1597         if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
1598                 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
1599                 cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
1600                 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1601         } else {
1602                 ran_end = bio->bio_offset + bp->b_bufsize;
1603                 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
1604                 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
1605                 tmp64 = ran_end + MAXPHYS + 1;  /* work-around GCC-4 bug */
1606                 if (tmp64 < ran_end)
1607                         cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1608                 else
1609                         cursor.key_end.key = ran_end + MAXPHYS + 1;
1610         }
1611         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
1612
1613         error = hammer_ip_first(&cursor, ip);
1614         boff = 0;
1615
1616         while (error == 0) {
1617                 error = hammer_ip_resolve_data(&cursor);
1618                 if (error)
1619                         break;
1620                 rec = cursor.record;
1621                 base = &rec->base.base;
1622
1623                 rec_offset = base->key - rec->data.base.data_len;
1624
1625                 /*
1626                  * Calculate the gap, if any, and zero-fill it.
1627                  */
1628                 n = (int)(rec_offset - (bio->bio_offset + boff));
1629                 if (n > 0) {
1630                         if (n > bp->b_bufsize - boff)
1631                                 n = bp->b_bufsize - boff;
1632                         bzero((char *)bp->b_data + boff, n);
1633                         boff += n;
1634                         n = 0;
1635                 }
1636
1637                 /*
1638                  * Calculate the data offset in the record and the number
1639                  * of bytes we can copy.
1640                  *
1641                  * Note there is a degenerate case here where boff may
1642                  * already be at bp->b_bufsize.
1643                  */
1644                 roff = -n;
1645                 n = rec->data.base.data_len - roff;
1646                 KKASSERT(n > 0);
1647                 if (n > bp->b_bufsize - boff)
1648                         n = bp->b_bufsize - boff;
1649                 bcopy((char *)cursor.data + roff, (char *)bp->b_data + boff, n);
1650                 boff += n;
1651                 if (boff == bp->b_bufsize)
1652                         break;
1653                 error = hammer_ip_next(&cursor);
1654         }
1655         hammer_done_cursor(&cursor);
1656
1657         /*
1658          * There may have been a gap after the last record
1659          */
1660         if (error == ENOENT)
1661                 error = 0;
1662         if (error == 0 && boff != bp->b_bufsize) {
1663                 KKASSERT(boff < bp->b_bufsize);
1664                 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
1665                 /* boff = bp->b_bufsize; */
1666         }
1667         bp->b_resid = 0;
1668         return(error);
1669 }
1670
1671 /*
1672  * Write to a regular file.  Iterate the related records and mark for
1673  * deletion.  If existing edge records (left and right side) overlap our
1674  * write they have to be marked deleted and new records created, usually
1675  * referencing a portion of the original data.  Then add a record to
1676  * represent the buffer.
1677  *
1678  * The support code in hammer_object.c should be used to deal with mixed
1679  * in-memory and on-disk records.
1680  */
1681 static
1682 int
1683 hammer_vop_strategy_write(struct vop_strategy_args *ap)
1684 {
1685         struct hammer_transaction trans;
1686         struct hammer_cursor *spike = NULL;
1687         hammer_inode_t ip;
1688         struct bio *bio;
1689         struct buf *bp;
1690         int error;
1691
1692         bio = ap->a_bio;
1693         bp = bio->bio_buf;
1694         ip = ap->a_vp->v_data;
1695
1696         if (ip->flags & HAMMER_INODE_RO)
1697                 return (EROFS);
1698
1699         /*
1700          * Start a transaction using the TID stored with the bp.
1701          */
1702         KKASSERT(bp->b_tid != 0);
1703         hammer_start_transaction_tid(&trans, ip->hmp, bp->b_tid);
1704
1705 retry:
1706         /*
1707          * Delete any records overlapping our range.  This function will
1708          * (eventually) properly truncate partial overlaps.
1709          */
1710         if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
1711                 error = hammer_ip_delete_range(&trans, ip, bio->bio_offset,
1712                                                bio->bio_offset, &spike);
1713         } else {
1714                 error = hammer_ip_delete_range(&trans, ip, bio->bio_offset,
1715                                                bio->bio_offset +
1716                                                 bp->b_bufsize - 1,
1717                                                &spike);
1718         }
1719
1720         /*
1721          * Add a single record to cover the write
1722          */
1723         if (error == 0) {
1724                 error = hammer_ip_sync_data(&trans, ip, bio->bio_offset,
1725                                             bp->b_data, bp->b_bufsize,
1726                                             &spike);
1727         }
1728
1729         /*
1730          * If we ran out of space the spike structure will be filled in
1731          * and we must call hammer_spike with it, then retry.
1732          */
1733         if (error == ENOSPC) {
1734                 error = hammer_spike(&spike);
1735                 if (error == 0)
1736                         goto retry;
1737         }
1738         KKASSERT(spike == NULL);
1739
1740         /*
1741          * If an error occured abort the transaction
1742          */
1743         if (error) {
1744                 /* XXX undo deletion */
1745                 hammer_abort_transaction(&trans);
1746                 bp->b_resid = bp->b_bufsize;
1747         } else {
1748                 hammer_commit_transaction(&trans);
1749                 bp->b_resid = 0;
1750                 bp->b_tid = 0;
1751         }
1752         return(error);
1753 }
1754
1755 /*
1756  * dounlink - disconnect a directory entry
1757  *
1758  * XXX whiteout support not really in yet
1759  */
1760 static int
1761 hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred,
1762                 int flags)
1763 {
1764         struct hammer_transaction trans;
1765         struct namecache *ncp;
1766         hammer_inode_t dip;
1767         hammer_inode_t ip;
1768         hammer_record_ondisk_t rec;
1769         struct hammer_cursor cursor;
1770         int64_t namekey;
1771         int error;
1772
1773         /*
1774          * Calculate the namekey and setup the key range for the scan.  This
1775          * works kinda like a chained hash table where the lower 32 bits
1776          * of the namekey synthesize the chain.
1777          *
1778          * The key range is inclusive of both key_beg and key_end.
1779          */
1780         dip = VTOI(dvp);
1781         ncp = nch->ncp;
1782
1783         if (dip->flags & HAMMER_INODE_RO)
1784                 return (EROFS);
1785
1786         hammer_start_transaction(&trans, dip->hmp);
1787
1788         namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
1789 retry:
1790         hammer_init_cursor_hmp(&cursor, &dip->cache[0], dip->hmp);
1791         cursor.key_beg.obj_id = dip->obj_id;
1792         cursor.key_beg.key = namekey;
1793         cursor.key_beg.create_tid = 0;
1794         cursor.key_beg.delete_tid = 0;
1795         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1796         cursor.key_beg.obj_type = 0;
1797
1798         cursor.key_end = cursor.key_beg;
1799         cursor.key_end.key |= 0xFFFFFFFFULL;
1800         cursor.asof = dip->obj_asof;
1801         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1802
1803         /*
1804          * Scan all matching records (the chain), locate the one matching
1805          * the requested path component.  info->last_error contains the
1806          * error code on search termination and could be 0, ENOENT, or
1807          * something else.
1808          *
1809          * The hammer_ip_*() functions merge in-memory records with on-disk
1810          * records for the purposes of the search.
1811          */
1812         error = hammer_ip_first(&cursor, dip);
1813         while (error == 0) {
1814                 error = hammer_ip_resolve_data(&cursor);
1815                 if (error)
1816                         break;
1817                 rec = cursor.record;
1818                 if (ncp->nc_nlen == rec->entry.base.data_len &&
1819                     bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) {
1820                         break;
1821                 }
1822                 error = hammer_ip_next(&cursor);
1823         }
1824
1825         /*
1826          * If all is ok we have to get the inode so we can adjust nlinks.
1827          *
1828          * If the target is a directory, it must be empty.
1829          */
1830         if (error == 0) {
1831                 ip = hammer_get_inode(dip->hmp, &dip->cache[1],
1832                                       rec->entry.obj_id,
1833                                       dip->hmp->asof, 0, &error);
1834                 if (error == ENOENT) {
1835                         kprintf("obj_id %016llx\n", rec->entry.obj_id);
1836                         Debugger("ENOENT unlinking object that should exist, cont to sync");
1837                         hammer_sync_hmp(dip->hmp, MNT_NOWAIT);
1838                         Debugger("ENOENT - sync done");
1839                 }
1840                 if (error == 0 && ip->ino_rec.base.base.obj_type ==
1841                                   HAMMER_OBJTYPE_DIRECTORY) {
1842                         error = hammer_ip_check_directory_empty(&trans, ip);
1843                 }
1844                 /*
1845                  * WARNING: hammer_ip_del_directory() may have to terminate
1846                  * the cursor to avoid a lock recursion.  It's ok to call
1847                  * hammer_done_cursor() twice.
1848                  */
1849                 if (error == 0)
1850                         error = hammer_ip_del_directory(&trans, &cursor, dip, ip);
1851                 if (error == 0) {
1852                         cache_setunresolved(nch);
1853                         cache_setvp(nch, NULL);
1854                         /* XXX locking */
1855                         if (ip->vp)
1856                                 cache_inval_vp(ip->vp, CINV_DESTROY);
1857                 }
1858                 hammer_rel_inode(ip, 0);
1859         }
1860         hammer_done_cursor(&cursor);
1861         if (error == EDEADLK)
1862                 goto retry;
1863
1864         if (error == 0)
1865                 hammer_commit_transaction(&trans);
1866         else
1867                 hammer_abort_transaction(&trans);
1868         return (error);
1869 }
1870
1871 /************************************************************************
1872  *                          FIFO AND SPECFS OPS                         *
1873  ************************************************************************
1874  *
1875  */
1876
1877 static int
1878 hammer_vop_fifoclose (struct vop_close_args *ap)
1879 {
1880         /* XXX update itimes */
1881         return (VOCALL(&fifo_vnode_vops, &ap->a_head));
1882 }
1883
1884 static int
1885 hammer_vop_fiforead (struct vop_read_args *ap)
1886 {
1887         int error;
1888
1889         error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1890         /* XXX update access time */
1891         return (error);
1892 }
1893
1894 static int
1895 hammer_vop_fifowrite (struct vop_write_args *ap)
1896 {
1897         int error;
1898
1899         error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1900         /* XXX update access time */
1901         return (error);
1902 }
1903
1904 static int
1905 hammer_vop_specclose (struct vop_close_args *ap)
1906 {
1907         /* XXX update itimes */
1908         return (VOCALL(&spec_vnode_vops, &ap->a_head));
1909 }
1910
1911 static int
1912 hammer_vop_specread (struct vop_read_args *ap)
1913 {
1914         /* XXX update access time */
1915         return (VOCALL(&spec_vnode_vops, &ap->a_head));
1916 }
1917
1918 static int
1919 hammer_vop_specwrite (struct vop_write_args *ap)
1920 {
1921         /* XXX update last change time */
1922         return (VOCALL(&spec_vnode_vops, &ap->a_head));
1923 }
1924