3eee9cdeeaa831f6e18e83b1d091ea454b6bfaf5
[dragonfly.git] / sys / vfs / hammer / hammer_vnops.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.11 2007/12/30 00:47:22 dillon Exp $
35  */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/namecache.h>
42 #include <sys/vnode.h>
43 #include <sys/lockf.h>
44 #include <sys/event.h>
45 #include <sys/stat.h>
46 #include <vm/vm_extern.h>
47 #include <vfs/fifofs/fifo.h>
48 #include "hammer.h"
49
50 /*
51  * USERFS VNOPS
52  */
53 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
54 static int hammer_vop_fsync(struct vop_fsync_args *);
55 static int hammer_vop_read(struct vop_read_args *);
56 static int hammer_vop_write(struct vop_write_args *);
57 static int hammer_vop_access(struct vop_access_args *);
58 static int hammer_vop_advlock(struct vop_advlock_args *);
59 static int hammer_vop_close(struct vop_close_args *);
60 static int hammer_vop_ncreate(struct vop_ncreate_args *);
61 static int hammer_vop_getattr(struct vop_getattr_args *);
62 static int hammer_vop_nresolve(struct vop_nresolve_args *);
63 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
64 static int hammer_vop_nlink(struct vop_nlink_args *);
65 static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
66 static int hammer_vop_nmknod(struct vop_nmknod_args *);
67 static int hammer_vop_open(struct vop_open_args *);
68 static int hammer_vop_pathconf(struct vop_pathconf_args *);
69 static int hammer_vop_print(struct vop_print_args *);
70 static int hammer_vop_readdir(struct vop_readdir_args *);
71 static int hammer_vop_readlink(struct vop_readlink_args *);
72 static int hammer_vop_nremove(struct vop_nremove_args *);
73 static int hammer_vop_nrename(struct vop_nrename_args *);
74 static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
75 static int hammer_vop_setattr(struct vop_setattr_args *);
76 static int hammer_vop_strategy(struct vop_strategy_args *);
77 static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
78 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
79
80 static int hammer_vop_fifoclose (struct vop_close_args *);
81 static int hammer_vop_fiforead (struct vop_read_args *);
82 static int hammer_vop_fifowrite (struct vop_write_args *);
83
84 static int hammer_vop_specclose (struct vop_close_args *);
85 static int hammer_vop_specread (struct vop_read_args *);
86 static int hammer_vop_specwrite (struct vop_write_args *);
87
88 struct vop_ops hammer_vnode_vops = {
89         .vop_default =          vop_defaultop,
90         .vop_fsync =            hammer_vop_fsync,
91         .vop_getpages =         vop_stdgetpages,
92         .vop_putpages =         vop_stdputpages,
93         .vop_read =             hammer_vop_read,
94         .vop_write =            hammer_vop_write,
95         .vop_access =           hammer_vop_access,
96         .vop_advlock =          hammer_vop_advlock,
97         .vop_close =            hammer_vop_close,
98         .vop_ncreate =          hammer_vop_ncreate,
99         .vop_getattr =          hammer_vop_getattr,
100         .vop_inactive =         hammer_vop_inactive,
101         .vop_reclaim =          hammer_vop_reclaim,
102         .vop_nresolve =         hammer_vop_nresolve,
103         .vop_nlookupdotdot =    hammer_vop_nlookupdotdot,
104         .vop_nlink =            hammer_vop_nlink,
105         .vop_nmkdir =           hammer_vop_nmkdir,
106         .vop_nmknod =           hammer_vop_nmknod,
107         .vop_open =             hammer_vop_open,
108         .vop_pathconf =         hammer_vop_pathconf,
109         .vop_print =            hammer_vop_print,
110         .vop_readdir =          hammer_vop_readdir,
111         .vop_readlink =         hammer_vop_readlink,
112         .vop_nremove =          hammer_vop_nremove,
113         .vop_nrename =          hammer_vop_nrename,
114         .vop_nrmdir =           hammer_vop_nrmdir,
115         .vop_setattr =          hammer_vop_setattr,
116         .vop_strategy =         hammer_vop_strategy,
117         .vop_nsymlink =         hammer_vop_nsymlink,
118         .vop_nwhiteout =        hammer_vop_nwhiteout
119 };
120
121 struct vop_ops hammer_spec_vops = {
122         .vop_default =          spec_vnoperate,
123         .vop_fsync =            hammer_vop_fsync,
124         .vop_read =             hammer_vop_specread,
125         .vop_write =            hammer_vop_specwrite,
126         .vop_access =           hammer_vop_access,
127         .vop_close =            hammer_vop_specclose,
128         .vop_getattr =          hammer_vop_getattr,
129         .vop_inactive =         hammer_vop_inactive,
130         .vop_reclaim =          hammer_vop_reclaim,
131         .vop_setattr =          hammer_vop_setattr
132 };
133
134 struct vop_ops hammer_fifo_vops = {
135         .vop_default =          fifo_vnoperate,
136         .vop_fsync =            hammer_vop_fsync,
137         .vop_read =             hammer_vop_fiforead,
138         .vop_write =            hammer_vop_fifowrite,
139         .vop_access =           hammer_vop_access,
140         .vop_close =            hammer_vop_fifoclose,
141         .vop_getattr =          hammer_vop_getattr,
142         .vop_inactive =         hammer_vop_inactive,
143         .vop_reclaim =          hammer_vop_reclaim,
144         .vop_setattr =          hammer_vop_setattr
145 };
146
147 static int hammer_dounlink(struct nchandle *nch, struct vnode *dvp,
148                            struct ucred *cred, int flags);
149 static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
150 static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
151
152 #if 0
153 static
154 int
155 hammer_vop_vnoperate(struct vop_generic_args *)
156 {
157         return (VOCALL(&hammer_vnode_vops, ap));
158 }
159 #endif
160
161 /*
162  * hammer_vop_fsync { vp, waitfor }
163  */
164 static
165 int
166 hammer_vop_fsync(struct vop_fsync_args *ap)
167 {
168         hammer_inode_t ip;
169         int error;
170
171         ip = VTOI(ap->a_vp);
172         error = hammer_sync_inode(ip, ap->a_waitfor, 0);
173         return (error);
174 }
175
176 /*
177  * hammer_vop_read { vp, uio, ioflag, cred }
178  */
179 static
180 int
181 hammer_vop_read(struct vop_read_args *ap)
182 {
183         struct hammer_transaction trans;
184         hammer_inode_t ip;
185         off_t offset;
186         struct buf *bp;
187         struct uio *uio;
188         int error;
189         int n;
190         int seqcount;
191
192         if (ap->a_vp->v_type != VREG)
193                 return (EINVAL);
194         ip = VTOI(ap->a_vp);
195         error = 0;
196         seqcount = ap->a_ioflag >> 16;
197
198         hammer_start_transaction(&trans, ip->hmp);
199
200         /*
201          * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
202          */
203         uio = ap->a_uio;
204         while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_rec.ino_size) {
205                 offset = uio->uio_offset & HAMMER_BUFMASK;
206 #if 0
207                 error = cluster_read(ap->a_vp, ip->ino_rec.ino_size,
208                                      uio->uio_offset - offset, HAMMER_BUFSIZE,
209                                      MAXBSIZE, seqcount, &bp);
210 #endif
211                 error = bread(ap->a_vp, uio->uio_offset - offset,
212                               HAMMER_BUFSIZE, &bp);
213                 if (error) {
214                         brelse(bp);
215                         break;
216                 }
217                 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
218                 n = HAMMER_BUFSIZE - offset;
219                 if (n > uio->uio_resid)
220                         n = uio->uio_resid;
221                 if (n > ip->ino_rec.ino_size - uio->uio_offset)
222                         n = (int)(ip->ino_rec.ino_size - uio->uio_offset);
223                 error = uiomove((char *)bp->b_data + offset, n, uio);
224                 if (error) {
225                         bqrelse(bp);
226                         break;
227                 }
228                 ip->ino_rec.ino_atime = trans.tid;
229                 hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
230                 bqrelse(bp);
231         }
232         hammer_commit_transaction(&trans);
233         return (error);
234 }
235
236 /*
237  * hammer_vop_write { vp, uio, ioflag, cred }
238  */
239 static
240 int
241 hammer_vop_write(struct vop_write_args *ap)
242 {
243         struct hammer_transaction trans;
244         struct hammer_inode *ip;
245         struct uio *uio;
246         off_t offset;
247         struct buf *bp;
248         int error;
249         int n;
250         int flags;
251
252         if (ap->a_vp->v_type != VREG)
253                 return (EINVAL);
254         ip = VTOI(ap->a_vp);
255         error = 0;
256
257         /*
258          * Create a transaction to cover the operations we perform.
259          */
260         hammer_start_transaction(&trans, ip->hmp);
261         uio = ap->a_uio;
262
263         /*
264          * Check append mode
265          */
266         if (ap->a_ioflag & IO_APPEND)
267                 uio->uio_offset = ip->ino_rec.ino_size;
268
269         /*
270          * Check for illegal write offsets.  Valid range is 0...2^63-1
271          */
272         if (uio->uio_offset < 0 || uio->uio_offset + uio->uio_resid <= 0)
273                 return (EFBIG);
274
275         /*
276          * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
277          */
278         while (uio->uio_resid > 0) {
279                 offset = uio->uio_offset & HAMMER_BUFMASK;
280                 if (uio->uio_segflg == UIO_NOCOPY) {
281                         /*
282                          * Issuing a write with the same data backing the
283                          * buffer.  Instantiate the buffer to collect the
284                          * backing vm pages, then read-in any missing bits.
285                          *
286                          * This case is used by vop_stdputpages().
287                          */
288                         bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE,
289                                     0, 0);
290                         if ((bp->b_flags & B_CACHE) == 0) {
291                                 bqrelse(bp);
292                                 error = bread(ap->a_vp,
293                                               uio->uio_offset - offset,
294                                               HAMMER_BUFSIZE, &bp);
295                                 if (error) {
296                                         brelse(bp);
297                                         break;
298                                 }
299                         }
300                 } else if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
301                         /*
302                          * entirely overwrite the buffer
303                          */
304                         bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE,
305                                     0, 0);
306                 } else if (offset == 0 && uio->uio_offset >= ip->ino_rec.ino_size) {
307                         /*
308                          * XXX
309                          */
310                         bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE,
311                                     0, 0);
312                         vfs_bio_clrbuf(bp);
313                 } else {
314                         /*
315                          * Partial overwrite, read in any missing bits then
316                          * replace the portion being written.
317                          */
318                         error = bread(ap->a_vp, uio->uio_offset - offset,
319                                       HAMMER_BUFSIZE, &bp);
320                         if (error) {
321                                 brelse(bp);
322                                 break;
323                         }
324                 }
325                 n = HAMMER_BUFSIZE - offset;
326                 if (n > uio->uio_resid)
327                         n = uio->uio_resid;
328                 error = uiomove((char *)bp->b_data + offset, n, uio);
329                 if (error) {
330                         brelse(bp);
331                         break;
332                 }
333                 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
334                 if (ip->ino_rec.ino_size < uio->uio_offset) {
335                         ip->ino_rec.ino_size = uio->uio_offset;
336                         ip->ino_rec.ino_mtime = trans.tid;
337                         flags = HAMMER_INODE_RDIRTY | HAMMER_INODE_ITIMES |
338                                 HAMMER_INODE_TID;
339                         vnode_pager_setsize(ap->a_vp, ip->ino_rec.ino_size);
340                 } else {
341                         flags = HAMMER_INODE_TID;
342                 }
343                 hammer_modify_inode(&trans, ip, flags);
344                 if (ap->a_ioflag & IO_SYNC) {
345                         bwrite(bp);
346                 } else if (ap->a_ioflag & IO_DIRECT) {
347                         bawrite(bp);
348                 } else {
349                         bdwrite(bp);
350                 }
351         }
352         if (error)
353                 hammer_abort_transaction(&trans);
354         else
355                 hammer_commit_transaction(&trans);
356         return (error);
357 }
358
359 /*
360  * hammer_vop_access { vp, mode, cred }
361  */
362 static
363 int
364 hammer_vop_access(struct vop_access_args *ap)
365 {
366         struct hammer_inode *ip = VTOI(ap->a_vp);
367         uid_t uid;
368         gid_t gid;
369         int error;
370
371         uid = hammer_to_unix_xid(&ip->ino_data.uid);
372         gid = hammer_to_unix_xid(&ip->ino_data.gid);
373
374         error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
375                                   ip->ino_data.uflags);
376         return (error);
377 }
378
379 /*
380  * hammer_vop_advlock { vp, id, op, fl, flags }
381  */
382 static
383 int
384 hammer_vop_advlock(struct vop_advlock_args *ap)
385 {
386         struct hammer_inode *ip = VTOI(ap->a_vp);
387
388         return (lf_advlock(ap, &ip->advlock, ip->ino_rec.ino_size));
389 }
390
391 /*
392  * hammer_vop_close { vp, fflag }
393  */
394 static
395 int
396 hammer_vop_close(struct vop_close_args *ap)
397 {
398         return (vop_stdclose(ap));
399 }
400
401 /*
402  * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
403  *
404  * The operating system has already ensured that the directory entry
405  * does not exist and done all appropriate namespace locking.
406  */
407 static
408 int
409 hammer_vop_ncreate(struct vop_ncreate_args *ap)
410 {
411         struct hammer_transaction trans;
412         struct hammer_inode *dip;
413         struct hammer_inode *nip;
414         struct nchandle *nch;
415         int error;
416
417         nch = ap->a_nch;
418         dip = VTOI(ap->a_dvp);
419
420         /*
421          * Create a transaction to cover the operations we perform.
422          */
423         hammer_start_transaction(&trans, dip->hmp);
424
425         /*
426          * Create a new filesystem object of the requested type.  The
427          * returned inode will be referenced but not locked.
428          */
429
430         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
431         if (error) {
432                 hammer_abort_transaction(&trans);
433                 *ap->a_vpp = NULL;
434                 return (error);
435         }
436
437         /*
438          * Add the new filesystem object to the directory.  This will also
439          * bump the inode's link count.
440          */
441         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
442
443         /*
444          * Finish up.
445          */
446         if (error) {
447                 hammer_rel_inode(nip, 0);
448                 hammer_abort_transaction(&trans);
449                 *ap->a_vpp = NULL;
450         } else {
451                 hammer_commit_transaction(&trans);
452                 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
453                 hammer_rel_inode(nip, 0);
454                 if (error == 0) {
455                         cache_setunresolved(ap->a_nch);
456                         cache_setvp(ap->a_nch, *ap->a_vpp);
457                 }
458         }
459         return (error);
460 }
461
462 /*
463  * hammer_vop_getattr { vp, vap }
464  */
465 static
466 int
467 hammer_vop_getattr(struct vop_getattr_args *ap)
468 {
469         struct hammer_inode *ip = VTOI(ap->a_vp);
470         struct vattr *vap = ap->a_vap;
471
472 #if 0
473         if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) &&
474             (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 &&
475             ip->obj_asof == XXX
476         ) {
477                 /* LAZYMOD XXX */
478         }
479         hammer_itimes(ap->a_vp);
480 #endif
481
482         vap->va_fsid = ip->hmp->fsid_udev;
483         vap->va_fileid = ip->ino_rec.base.base.obj_id;
484         vap->va_mode = ip->ino_data.mode;
485         vap->va_nlink = ip->ino_rec.ino_nlinks;
486         vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
487         vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
488         vap->va_rmajor = 0;
489         vap->va_rminor = 0;
490         vap->va_size = ip->ino_rec.ino_size;
491         hammer_to_timespec(ip->ino_rec.ino_atime, &vap->va_atime);
492         hammer_to_timespec(ip->ino_rec.ino_mtime, &vap->va_mtime);
493         hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
494         vap->va_flags = ip->ino_data.uflags;
495         vap->va_gen = 1;        /* hammer inums are unique for all time */
496         vap->va_blocksize = 32768; /* XXX - extract from root volume */
497         vap->va_bytes = ip->ino_rec.ino_size;
498         vap->va_type = hammer_get_vnode_type(ip->ino_rec.base.base.obj_type);
499         vap->va_filerev = 0;    /* XXX */
500         /* mtime uniquely identifies any adjustments made to the file */
501         vap->va_fsmid = ip->ino_rec.ino_mtime;
502         vap->va_uid_uuid = ip->ino_data.uid;
503         vap->va_gid_uuid = ip->ino_data.gid;
504         vap->va_fsid_uuid = ip->hmp->fsid;
505         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
506                           VA_FSID_UUID_VALID;
507
508         switch (ip->ino_rec.base.base.obj_type) {
509         case HAMMER_OBJTYPE_CDEV:
510         case HAMMER_OBJTYPE_BDEV:
511                 vap->va_rmajor = ip->ino_data.rmajor;
512                 vap->va_rminor = ip->ino_data.rminor;
513                 break;
514         default:
515                 break;
516         }
517
518         return(0);
519 }
520
521 /*
522  * hammer_vop_nresolve { nch, dvp, cred }
523  *
524  * Locate the requested directory entry.
525  */
526 static
527 int
528 hammer_vop_nresolve(struct vop_nresolve_args *ap)
529 {
530         struct namecache *ncp;
531         hammer_inode_t dip;
532         hammer_inode_t ip;
533         hammer_tid_t asof;
534         struct hammer_cursor cursor;
535         union hammer_record_ondisk *rec;
536         struct vnode *vp;
537         int64_t namekey;
538         int error;
539         int i;
540         int nlen;
541
542         /*
543          * Misc initialization, plus handle as-of name extensions.  Look for
544          * the '@@' extension.  Note that as-of files and directories cannot
545          * be modified.
546          *
547          *
548          */
549         dip = VTOI(ap->a_dvp);
550         ncp = ap->a_nch->ncp;
551         asof = dip->obj_asof;
552         nlen = ncp->nc_nlen;
553
554         for (i = 0; i < nlen; ++i) {
555                 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
556                         asof = hammer_now_tid() - 
557                                strtoq(ncp->nc_name + i + 2, NULL, 0) *
558                                1000000000LL;
559                         kprintf("ASOF %016llx\n", asof);
560                         break;
561                 }
562         }
563         nlen = i;
564
565         /*
566          * Calculate the namekey and setup the key range for the scan.  This
567          * works kinda like a chained hash table where the lower 32 bits
568          * of the namekey synthesize the chain.
569          *
570          * The key range is inclusive of both key_beg and key_end.
571          */
572         namekey = hammer_directory_namekey(ncp->nc_name, nlen);
573
574         hammer_init_cursor_ip(&cursor, dip);
575         cursor.key_beg.obj_id = dip->obj_id;
576         cursor.key_beg.key = namekey;
577         cursor.key_beg.create_tid = asof;
578         cursor.key_beg.delete_tid = 0;
579         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
580         cursor.key_beg.obj_type = 0;
581
582         cursor.key_end = cursor.key_beg;
583         cursor.key_end.key |= 0xFFFFFFFFULL;
584         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
585
586         /*
587          * Scan all matching records (the chain), locate the one matching
588          * the requested path component.
589          *
590          * The hammer_ip_*() functions merge in-memory records with on-disk
591          * records for the purposes of the search.
592          */
593         error = hammer_ip_first(&cursor, dip);
594         while (error == 0) {
595                 error = hammer_ip_resolve_data(&cursor);
596                 if (error)
597                         break;
598                 rec = cursor.record;
599                 if (nlen == rec->entry.base.data_len &&
600                     bcmp(ncp->nc_name, cursor.data, nlen) == 0) {
601                         break;
602                 }
603                 error = hammer_ip_next(&cursor);
604         }
605         if (error == 0) {
606                 ip = hammer_get_inode(dip->hmp, rec->entry.obj_id,
607                                       asof, &error);
608                 if (error == 0) {
609                         error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
610                         hammer_rel_inode(ip, 0);
611                 } else {
612                         vp = NULL;
613                 }
614                 if (error == 0) {
615                         vn_unlock(vp);
616                         cache_setvp(ap->a_nch, vp);
617                         vrele(vp);
618                 }
619         } else if (error == ENOENT) {
620                 cache_setvp(ap->a_nch, NULL);
621         }
622         hammer_done_cursor(&cursor);
623         return (error);
624 }
625
626 /*
627  * hammer_vop_nlookupdotdot { dvp, vpp, cred }
628  *
629  * Locate the parent directory of a directory vnode.
630  *
631  * dvp is referenced but not locked.  *vpp must be returned referenced and
632  * locked.  A parent_obj_id of 0 does not necessarily indicate that we are
633  * at the root, instead it could indicate that the directory we were in was
634  * removed.
635  */
636 static
637 int
638 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
639 {
640         struct hammer_inode *dip;
641         u_int64_t parent_obj_id;
642
643         dip = VTOI(ap->a_dvp);
644         if ((parent_obj_id = dip->ino_data.parent_obj_id) == 0) {
645                 *ap->a_vpp = NULL;
646                 return ENOENT;
647         }
648         return(hammer_vfs_vget(dip->hmp->mp, parent_obj_id, ap->a_vpp));
649 }
650
651 /*
652  * hammer_vop_nlink { nch, dvp, vp, cred }
653  */
654 static
655 int
656 hammer_vop_nlink(struct vop_nlink_args *ap)
657 {
658         struct hammer_transaction trans;
659         struct hammer_inode *dip;
660         struct hammer_inode *ip;
661         struct nchandle *nch;
662         int error;
663
664         nch = ap->a_nch;
665         dip = VTOI(ap->a_dvp);
666         ip = VTOI(ap->a_vp);
667
668         /*
669          * Create a transaction to cover the operations we perform.
670          */
671         hammer_start_transaction(&trans, dip->hmp);
672
673         /*
674          * Add the filesystem object to the directory.  Note that neither
675          * dip nor ip are referenced or locked, but their vnodes are
676          * referenced.  This function will bump the inode's link count.
677          */
678         error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip);
679
680         /*
681          * Finish up.
682          */
683         if (error) {
684                 hammer_abort_transaction(&trans);
685         } else {
686                 cache_setunresolved(nch);
687                 cache_setvp(nch, ap->a_vp);
688                 hammer_commit_transaction(&trans);
689         }
690         return (error);
691 }
692
693 /*
694  * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
695  *
696  * The operating system has already ensured that the directory entry
697  * does not exist and done all appropriate namespace locking.
698  */
699 static
700 int
701 hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
702 {
703         struct hammer_transaction trans;
704         struct hammer_inode *dip;
705         struct hammer_inode *nip;
706         struct nchandle *nch;
707         int error;
708
709         nch = ap->a_nch;
710         dip = VTOI(ap->a_dvp);
711
712         /*
713          * Create a transaction to cover the operations we perform.
714          */
715         hammer_start_transaction(&trans, dip->hmp);
716
717         /*
718          * Create a new filesystem object of the requested type.  The
719          * returned inode will be referenced but not locked.
720          */
721         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
722         if (error) {
723                 hammer_abort_transaction(&trans);
724                 *ap->a_vpp = NULL;
725                 return (error);
726         }
727
728         /*
729          * Add the new filesystem object to the directory.  This will also
730          * bump the inode's link count.
731          */
732         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
733
734         /*
735          * Finish up.
736          */
737         if (error) {
738                 hammer_rel_inode(nip, 0);
739                 hammer_abort_transaction(&trans);
740                 *ap->a_vpp = NULL;
741         } else {
742                 hammer_commit_transaction(&trans);
743                 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
744                 hammer_rel_inode(nip, 0);
745                 if (error == 0) {
746                         cache_setunresolved(ap->a_nch);
747                         cache_setvp(ap->a_nch, *ap->a_vpp);
748                 }
749         }
750         return (error);
751 }
752
753 /*
754  * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
755  *
756  * The operating system has already ensured that the directory entry
757  * does not exist and done all appropriate namespace locking.
758  */
759 static
760 int
761 hammer_vop_nmknod(struct vop_nmknod_args *ap)
762 {
763         struct hammer_transaction trans;
764         struct hammer_inode *dip;
765         struct hammer_inode *nip;
766         struct nchandle *nch;
767         int error;
768
769         nch = ap->a_nch;
770         dip = VTOI(ap->a_dvp);
771
772         /*
773          * Create a transaction to cover the operations we perform.
774          */
775         hammer_start_transaction(&trans, dip->hmp);
776
777         /*
778          * Create a new filesystem object of the requested type.  The
779          * returned inode will be referenced but not locked.
780          */
781         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
782         if (error) {
783                 hammer_abort_transaction(&trans);
784                 *ap->a_vpp = NULL;
785                 return (error);
786         }
787
788         /*
789          * Add the new filesystem object to the directory.  This will also
790          * bump the inode's link count.
791          */
792         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
793
794         /*
795          * Finish up.
796          */
797         if (error) {
798                 hammer_rel_inode(nip, 0);
799                 hammer_abort_transaction(&trans);
800                 *ap->a_vpp = NULL;
801         } else {
802                 hammer_commit_transaction(&trans);
803                 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
804                 hammer_rel_inode(nip, 0);
805                 if (error == 0) {
806                         cache_setunresolved(ap->a_nch);
807                         cache_setvp(ap->a_nch, *ap->a_vpp);
808                 }
809         }
810         return (error);
811 }
812
813 /*
814  * hammer_vop_open { vp, mode, cred, fp }
815  */
816 static
817 int
818 hammer_vop_open(struct vop_open_args *ap)
819 {
820         return(vop_stdopen(ap));
821 }
822
823 /*
824  * hammer_vop_pathconf { vp, name, retval }
825  */
826 static
827 int
828 hammer_vop_pathconf(struct vop_pathconf_args *ap)
829 {
830         return EOPNOTSUPP;
831 }
832
833 /*
834  * hammer_vop_print { vp }
835  */
836 static
837 int
838 hammer_vop_print(struct vop_print_args *ap)
839 {
840         return EOPNOTSUPP;
841 }
842
843 /*
844  * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
845  */
846 static
847 int
848 hammer_vop_readdir(struct vop_readdir_args *ap)
849 {
850         struct hammer_cursor cursor;
851         struct hammer_inode *ip;
852         struct uio *uio;
853         hammer_record_ondisk_t rec;
854         hammer_base_elm_t base;
855         int error;
856         int cookie_index;
857         int ncookies;
858         off_t *cookies;
859         off_t saveoff;
860         int r;
861
862         ip = VTOI(ap->a_vp);
863         uio = ap->a_uio;
864         hammer_init_cursor_ip(&cursor, ip);
865
866         /*
867          * Key range (begin and end inclusive) to scan.  Directory keys
868          * directly translate to a 64 bit 'seek' position.
869          */
870         cursor.key_beg.obj_id = ip->obj_id;
871         cursor.key_beg.create_tid = ip->obj_asof;
872         cursor.key_beg.delete_tid = 0;
873         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
874         cursor.key_beg.obj_type = 0;
875         cursor.key_beg.key = uio->uio_offset;
876
877         cursor.key_end = cursor.key_beg;
878         cursor.key_end.key = HAMMER_MAX_KEY;
879         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
880
881         if (ap->a_ncookies) {
882                 ncookies = uio->uio_resid / 16 + 1;
883                 if (ncookies > 1024)
884                         ncookies = 1024;
885                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
886                 cookie_index = 0;
887         } else {
888                 ncookies = -1;
889                 cookies = NULL;
890                 cookie_index = 0;
891         }
892
893         saveoff = cursor.key_beg.key;
894         error = hammer_ip_first(&cursor, ip);
895
896         while (error == 0) {
897                 error = hammer_ip_resolve_data(&cursor);
898                 if (error)
899                         break;
900                 rec = cursor.record;
901                 base = &rec->base.base;
902                 saveoff = base->key;
903
904                 if (base->obj_id != ip->obj_id)
905                         panic("readdir: bad record at %p", cursor.node);
906
907                 r = vop_write_dirent(
908                              &error, uio, rec->entry.obj_id,
909                              hammer_get_dtype(rec->entry.base.base.obj_type),
910                              rec->entry.base.data_len,
911                              (void *)cursor.data);
912                 if (r)
913                         break;
914                 ++saveoff;
915                 if (cookies)
916                         cookies[cookie_index] = base->key;
917                 ++cookie_index;
918                 if (cookie_index == ncookies)
919                         break;
920                 error = hammer_ip_next(&cursor);
921         }
922         hammer_done_cursor(&cursor);
923
924         if (ap->a_eofflag)
925                 *ap->a_eofflag = (error == ENOENT);
926         uio->uio_offset = saveoff;
927         if (error && cookie_index == 0) {
928                 if (cookies) {
929                         kfree(cookies, M_TEMP);
930                         *ap->a_ncookies = 0;
931                         *ap->a_cookies = NULL;
932                 }
933         } else {
934                 if (error == ENOENT)
935                         error = 0;
936                 if (cookies) {
937                         *ap->a_ncookies = cookie_index;
938                         *ap->a_cookies = cookies;
939                 }
940         }
941         return(error);
942 }
943
944 /*
945  * hammer_vop_readlink { vp, uio, cred }
946  */
947 static
948 int
949 hammer_vop_readlink(struct vop_readlink_args *ap)
950 {
951         struct hammer_cursor cursor;
952         struct hammer_inode *ip;
953         int error;
954
955         ip = VTOI(ap->a_vp);
956         hammer_init_cursor_ip(&cursor, ip);
957
958         /*
959          * Key range (begin and end inclusive) to scan.  Directory keys
960          * directly translate to a 64 bit 'seek' position.
961          */
962         cursor.key_beg.obj_id = ip->obj_id;
963         cursor.key_beg.create_tid = ip->obj_asof;
964         cursor.key_beg.delete_tid = 0;
965         cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX;
966         cursor.key_beg.obj_type = 0;
967         cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK;
968
969         error = hammer_ip_lookup(&cursor, ip);
970         if (error == 0) {
971                 error = hammer_ip_resolve_data(&cursor);
972                 if (error == 0) {
973                         error = uiomove((char *)cursor.data,
974                                         cursor.record->generic.base.data_len,
975                                         ap->a_uio);
976                 }
977         }
978         hammer_done_cursor(&cursor);
979         return(error);
980 }
981
982 /*
983  * hammer_vop_nremove { nch, dvp, cred }
984  */
985 static
986 int
987 hammer_vop_nremove(struct vop_nremove_args *ap)
988 {
989         return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
990 }
991
992 /*
993  * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
994  */
995 static
996 int
997 hammer_vop_nrename(struct vop_nrename_args *ap)
998 {
999         struct hammer_transaction trans;
1000         struct namecache *fncp;
1001         struct namecache *tncp;
1002         struct hammer_inode *fdip;
1003         struct hammer_inode *tdip;
1004         struct hammer_inode *ip;
1005         struct hammer_cursor cursor;
1006         union hammer_record_ondisk *rec;
1007         int64_t namekey;
1008         int error;
1009
1010         fdip = VTOI(ap->a_fdvp);
1011         tdip = VTOI(ap->a_tdvp);
1012         fncp = ap->a_fnch->ncp;
1013         tncp = ap->a_tnch->ncp;
1014         hammer_start_transaction(&trans, fdip->hmp);
1015
1016         /*
1017          * Extract the hammer_inode from fncp and add link to the target
1018          * directory.
1019          */
1020         ip = VTOI(fncp->nc_vp);
1021         KKASSERT(ip != NULL);
1022
1023         error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
1024
1025         /*
1026          * Locate the record in the originating directory and remove it.
1027          *
1028          * Calculate the namekey and setup the key range for the scan.  This
1029          * works kinda like a chained hash table where the lower 32 bits
1030          * of the namekey synthesize the chain.
1031          *
1032          * The key range is inclusive of both key_beg and key_end.
1033          */
1034         namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
1035
1036         hammer_init_cursor_ip(&cursor, fdip);
1037         cursor.key_beg.obj_id = fdip->obj_id;
1038         cursor.key_beg.key = namekey;
1039         cursor.key_beg.create_tid = fdip->obj_asof;
1040         cursor.key_beg.delete_tid = 0;
1041         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1042         cursor.key_beg.obj_type = 0;
1043
1044         cursor.key_end = cursor.key_beg;
1045         cursor.key_end.key |= 0xFFFFFFFFULL;
1046         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
1047
1048         /*
1049          * Scan all matching records (the chain), locate the one matching
1050          * the requested path component.
1051          *
1052          * The hammer_ip_*() functions merge in-memory records with on-disk
1053          * records for the purposes of the search.
1054          */
1055         error = hammer_ip_first(&cursor, fdip);
1056         while (error == 0) {
1057                 if (hammer_ip_resolve_data(&cursor) != 0)
1058                         break;
1059                 rec = cursor.record;
1060                 if (fncp->nc_nlen == rec->entry.base.data_len &&
1061                     bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) {
1062                         break;
1063                 }
1064                 error = hammer_ip_next(&cursor);
1065         }
1066
1067         /*
1068          * If all is ok we have to get the inode so we can adjust nlinks.
1069          */
1070         if (error)
1071                 goto done;
1072         error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
1073         if (error == 0) {
1074                 cache_rename(ap->a_fnch, ap->a_tnch);
1075                 cache_setvp(ap->a_tnch, ip->vp);
1076         }
1077 done:
1078         hammer_done_cursor(&cursor);
1079         if (error == 0) {
1080                 hammer_commit_transaction(&trans);
1081         } else {
1082                 hammer_abort_transaction(&trans);
1083         }
1084         return (error);
1085 }
1086
1087 /*
1088  * hammer_vop_nrmdir { nch, dvp, cred }
1089  */
1090 static
1091 int
1092 hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
1093 {
1094         /* XXX check that directory is empty */
1095
1096         return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
1097 }
1098
1099 /*
1100  * hammer_vop_setattr { vp, vap, cred }
1101  */
1102 static
1103 int
1104 hammer_vop_setattr(struct vop_setattr_args *ap)
1105 {
1106         struct hammer_transaction trans;
1107         struct hammer_cursor *spike = NULL;
1108         struct vattr *vap;
1109         struct hammer_inode *ip;
1110         int modflags;
1111         int error;
1112         int64_t aligned_size;
1113         u_int32_t flags;
1114         uuid_t uuid;
1115
1116         vap = ap->a_vap;
1117         ip = ap->a_vp->v_data;
1118         modflags = 0;
1119
1120         if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1121                 return(EROFS);
1122
1123         hammer_start_transaction(&trans, ip->hmp);
1124         error = 0;
1125
1126         if (vap->va_flags != VNOVAL) {
1127                 flags = ip->ino_data.uflags;
1128                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
1129                                          hammer_to_unix_xid(&ip->ino_data.uid),
1130                                          ap->a_cred);
1131                 if (error == 0) {
1132                         if (ip->ino_data.uflags != flags) {
1133                                 ip->ino_data.uflags = flags;
1134                                 modflags |= HAMMER_INODE_DDIRTY;
1135                         }
1136                         if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1137                                 error = 0;
1138                                 goto done;
1139                         }
1140                 }
1141                 goto done;
1142         }
1143         if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1144                 error = EPERM;
1145                 goto done;
1146         }
1147         if (vap->va_uid != (uid_t)VNOVAL) {
1148                 hammer_guid_to_uuid(&uuid, vap->va_uid);
1149                 if (bcmp(&uuid, &ip->ino_data.uid, sizeof(uuid)) != 0) {
1150                         ip->ino_data.uid = uuid;
1151                         modflags |= HAMMER_INODE_DDIRTY;
1152                 }
1153         }
1154         if (vap->va_gid != (uid_t)VNOVAL) {
1155                 hammer_guid_to_uuid(&uuid, vap->va_gid);
1156                 if (bcmp(&uuid, &ip->ino_data.gid, sizeof(uuid)) != 0) {
1157                         ip->ino_data.gid = uuid;
1158                         modflags |= HAMMER_INODE_DDIRTY;
1159                 }
1160         }
1161         while (vap->va_size != VNOVAL && ip->ino_rec.ino_size != vap->va_size) {
1162                 switch(ap->a_vp->v_type) {
1163                 case VREG:
1164                         if (vap->va_size < ip->ino_rec.ino_size) {
1165                                 vtruncbuf(ap->a_vp, vap->va_size,
1166                                           HAMMER_BUFSIZE);
1167                         } else if (vap->va_size > ip->ino_rec.ino_size) {
1168                                 vnode_pager_setsize(ap->a_vp, vap->va_size);
1169                         }
1170                         aligned_size = (vap->va_size + HAMMER_BUFMASK) &
1171                                         ~(int64_t)HAMMER_BUFMASK;
1172                         error = hammer_ip_delete_range(&trans, ip,
1173                                                     aligned_size,
1174                                                     0x7FFFFFFFFFFFFFFFLL,
1175                                                     &spike);
1176                         ip->ino_rec.ino_size = vap->va_size;
1177                         modflags |= HAMMER_INODE_RDIRTY;
1178                         break;
1179                 case VDATABASE:
1180                         error = hammer_ip_delete_range(&trans, ip,
1181                                                     vap->va_size,
1182                                                     0x7FFFFFFFFFFFFFFFLL,
1183                                                     &spike);
1184                         ip->ino_rec.ino_size = vap->va_size;
1185                         modflags |= HAMMER_INODE_RDIRTY;
1186                         break;
1187                 default:
1188                         error = EINVAL;
1189                         goto done;
1190                 }
1191                 if (error == ENOSPC) {
1192                         error = hammer_spike(&spike);
1193                         if (error == 0)
1194                                 continue;
1195                 }
1196                 KKASSERT(spike == NULL);
1197                 break;
1198         }
1199         if (vap->va_atime.tv_sec != VNOVAL) {
1200                 ip->ino_rec.ino_atime =
1201                         hammer_timespec_to_transid(&vap->va_atime);
1202                 modflags |= HAMMER_INODE_ITIMES;
1203         }
1204         if (vap->va_mtime.tv_sec != VNOVAL) {
1205                 ip->ino_rec.ino_mtime =
1206                         hammer_timespec_to_transid(&vap->va_mtime);
1207                 modflags |= HAMMER_INODE_ITIMES;
1208         }
1209         if (vap->va_mode != (mode_t)VNOVAL) {
1210                 if (ip->ino_data.mode != vap->va_mode) {
1211                         ip->ino_data.mode = vap->va_mode;
1212                         modflags |= HAMMER_INODE_DDIRTY;
1213                 }
1214         }
1215 done:
1216         if (error) {
1217                 hammer_abort_transaction(&trans);
1218         } else {
1219                 if (modflags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY))
1220                         modflags |= HAMMER_INODE_TID;
1221                 hammer_modify_inode(&trans, ip, modflags);
1222                 hammer_commit_transaction(&trans);
1223         }
1224         return (error);
1225 }
1226
1227 /*
1228  * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1229  */
1230 static
1231 int
1232 hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
1233 {
1234         struct hammer_transaction trans;
1235         struct hammer_inode *dip;
1236         struct hammer_inode *nip;
1237         struct nchandle *nch;
1238         hammer_record_t record;
1239         int error;
1240         int bytes;
1241
1242         ap->a_vap->va_type = VLNK;
1243
1244         nch = ap->a_nch;
1245         dip = VTOI(ap->a_dvp);
1246
1247         /*
1248          * Create a transaction to cover the operations we perform.
1249          */
1250         hammer_start_transaction(&trans, dip->hmp);
1251
1252         /*
1253          * Create a new filesystem object of the requested type.  The
1254          * returned inode will be referenced but not locked.
1255          */
1256
1257         error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
1258         if (error) {
1259                 hammer_abort_transaction(&trans);
1260                 *ap->a_vpp = NULL;
1261                 return (error);
1262         }
1263
1264         /*
1265          * Add the new filesystem object to the directory.  This will also
1266          * bump the inode's link count.
1267          */
1268         error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
1269
1270         /*
1271          * Add a record representing the symlink.  symlink stores the link
1272          * as pure data, not a string, and is no \0 terminated.
1273          */
1274         if (error == 0) {
1275                 record = hammer_alloc_mem_record(nip);
1276                 bytes = strlen(ap->a_target);
1277
1278                 record->rec.generic.base.base.key = HAMMER_FIXKEY_SYMLINK;
1279                 record->rec.generic.base.base.rec_type = HAMMER_RECTYPE_FIX;
1280                 record->rec.generic.base.data_len = bytes;
1281                 if (bytes <= sizeof(record->rec.generic.filler)) {
1282                         record->data = (void *)record->rec.generic.filler;
1283                         bcopy(ap->a_target, record->data, bytes);
1284                 } else {
1285                         record->data = (void *)ap->a_target;
1286                         /* will be reallocated by routine below */
1287                 }
1288                 error = hammer_ip_add_record(&trans, record);
1289         }
1290
1291         /*
1292          * Finish up.
1293          */
1294         if (error) {
1295                 hammer_rel_inode(nip, 0);
1296                 hammer_abort_transaction(&trans);
1297                 *ap->a_vpp = NULL;
1298         } else {
1299                 hammer_commit_transaction(&trans);
1300                 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
1301                 hammer_rel_inode(nip, 0);
1302                 if (error == 0) {
1303                         cache_setunresolved(ap->a_nch);
1304                         cache_setvp(ap->a_nch, *ap->a_vpp);
1305                 }
1306         }
1307         return (error);
1308 }
1309
1310 /*
1311  * hammer_vop_nwhiteout { nch, dvp, cred, flags }
1312  */
1313 static
1314 int
1315 hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
1316 {
1317         return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, ap->a_flags));
1318 }
1319
1320 /*
1321  * hammer_vop_strategy { vp, bio }
1322  *
1323  * Strategy call, used for regular file read & write only.  Note that the
1324  * bp may represent a cluster.
1325  *
1326  * To simplify operation and allow better optimizations in the future,
1327  * this code does not make any assumptions with regards to buffer alignment
1328  * or size.
1329  */
1330 static
1331 int
1332 hammer_vop_strategy(struct vop_strategy_args *ap)
1333 {
1334         struct buf *bp;
1335         int error;
1336
1337         bp = ap->a_bio->bio_buf;
1338
1339         switch(bp->b_cmd) {
1340         case BUF_CMD_READ:
1341                 error = hammer_vop_strategy_read(ap);
1342                 break;
1343         case BUF_CMD_WRITE:
1344                 error = hammer_vop_strategy_write(ap);
1345                 break;
1346         default:
1347                 error = EINVAL;
1348                 break;
1349         }
1350         bp->b_error = error;
1351         if (error)
1352                 bp->b_flags |= B_ERROR;
1353         biodone(ap->a_bio);
1354         return (error);
1355 }
1356
1357 /*
1358  * Read from a regular file.  Iterate the related records and fill in the
1359  * BIO/BUF.  Gaps are zero-filled.
1360  *
1361  * The support code in hammer_object.c should be used to deal with mixed
1362  * in-memory and on-disk records.
1363  *
1364  * XXX atime update
1365  */
1366 static
1367 int
1368 hammer_vop_strategy_read(struct vop_strategy_args *ap)
1369 {
1370         struct hammer_inode *ip = ap->a_vp->v_data;
1371         struct hammer_cursor cursor;
1372         hammer_record_ondisk_t rec;
1373         hammer_base_elm_t base;
1374         struct bio *bio;
1375         struct buf *bp;
1376         int64_t rec_offset;
1377         int64_t ran_end;
1378         int64_t tmp64;
1379         int error;
1380         int boff;
1381         int roff;
1382         int n;
1383
1384         bio = ap->a_bio;
1385         bp = bio->bio_buf;
1386
1387         hammer_init_cursor_ip(&cursor, ip);
1388
1389         /*
1390          * Key range (begin and end inclusive) to scan.  Note that the key's
1391          * stored in the actual records represent BASE+LEN, not BASE.  The
1392          * first record containing bio_offset will have a key > bio_offset.
1393          */
1394         cursor.key_beg.obj_id = ip->obj_id;
1395         cursor.key_beg.create_tid = ip->obj_asof;
1396         cursor.key_beg.delete_tid = 0;
1397         cursor.key_beg.obj_type = 0;
1398         cursor.key_beg.key = bio->bio_offset + 1;
1399
1400         cursor.key_end = cursor.key_beg;
1401         if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
1402                 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
1403                 cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
1404                 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1405         } else {
1406                 ran_end = bio->bio_offset + bp->b_bufsize;
1407                 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
1408                 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
1409                 tmp64 = ran_end + MAXPHYS + 1;  /* work-around GCC-4 bug */
1410                 if (tmp64 < ran_end)
1411                         cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1412                 else
1413                         cursor.key_end.key = ran_end + MAXPHYS + 1;
1414         }
1415         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
1416
1417         error = hammer_ip_first(&cursor, ip);
1418         boff = 0;
1419
1420         while (error == 0) {
1421                 error = hammer_ip_resolve_data(&cursor);
1422                 if (error)
1423                         break;
1424                 rec = cursor.record;
1425                 base = &rec->base.base;
1426
1427                 rec_offset = base->key - rec->data.base.data_len;
1428
1429                 /*
1430                  * Calculate the gap, if any, and zero-fill it.
1431                  */
1432                 n = (int)(rec_offset - (bio->bio_offset + boff));
1433                 if (n > 0) {
1434                         if (n > bp->b_bufsize - boff)
1435                                 n = bp->b_bufsize - boff;
1436                         kprintf("zfill %d bytes\n", n);
1437                         bzero((char *)bp->b_data + boff, n);
1438                         boff += n;
1439                         n = 0;
1440                 }
1441
1442                 /*
1443                  * Calculate the data offset in the record and the number
1444                  * of bytes we can copy.
1445                  *
1446                  * Note there is a degenerate case here where boff may
1447                  * already be at bp->b_bufsize.
1448                  */
1449                 roff = -n;
1450                 n = rec->data.base.data_len - roff;
1451                 KKASSERT(n > 0);
1452                 if (n > bp->b_bufsize - boff)
1453                         n = bp->b_bufsize - boff;
1454                 bcopy((char *)cursor.data + roff, (char *)bp->b_data + boff, n);
1455                 boff += n;
1456                 if (boff == bp->b_bufsize)
1457                         break;
1458                 error = hammer_ip_next(&cursor);
1459         }
1460         hammer_done_cursor(&cursor);
1461
1462         /*
1463          * There may have been a gap after the last record
1464          */
1465         if (error == ENOENT)
1466                 error = 0;
1467         if (error == 0 && boff != bp->b_bufsize) {
1468                 KKASSERT(boff < bp->b_bufsize);
1469                 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
1470                 /* boff = bp->b_bufsize; */
1471         }
1472         bp->b_resid = 0;
1473         return(error);
1474 }
1475
1476 /*
1477  * Write to a regular file.  Iterate the related records and mark for
1478  * deletion.  If existing edge records (left and right side) overlap our
1479  * write they have to be marked deleted and new records created, usually
1480  * referencing a portion of the original data.  Then add a record to
1481  * represent the buffer.
1482  *
1483  * The support code in hammer_object.c should be used to deal with mixed
1484  * in-memory and on-disk records.
1485  */
1486 static
1487 int
1488 hammer_vop_strategy_write(struct vop_strategy_args *ap)
1489 {
1490         struct hammer_transaction trans;
1491         struct hammer_cursor *spike = NULL;
1492         hammer_inode_t ip;
1493         struct bio *bio;
1494         struct buf *bp;
1495         int error;
1496
1497         bio = ap->a_bio;
1498         bp = bio->bio_buf;
1499         ip = ap->a_vp->v_data;
1500         hammer_start_transaction(&trans, ip->hmp);
1501
1502 retry:
1503         /*
1504          * Delete any records overlapping our range.  This function will
1505          * (eventually) properly truncate partial overlaps.
1506          */
1507         if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
1508                 error = hammer_ip_delete_range(&trans, ip, bio->bio_offset,
1509                                                bio->bio_offset, &spike);
1510         } else {
1511                 error = hammer_ip_delete_range(&trans, ip, bio->bio_offset,
1512                                                bio->bio_offset +
1513                                                 bp->b_bufsize - 1,
1514                                                &spike);
1515         }
1516
1517         /*
1518          * Add a single record to cover the write
1519          */
1520         if (error == 0) {
1521                 error = hammer_ip_sync_data(&trans, ip, bio->bio_offset,
1522                                             bp->b_data, bp->b_bufsize,
1523                                             &spike);
1524         }
1525
1526         /*
1527          * If we ran out of space the spike structure will be filled in
1528          * and we must call hammer_spike with it, then retry.
1529          */
1530         if (error == ENOSPC) {
1531                 error = hammer_spike(&spike);
1532                 if (error == 0)
1533                         goto retry;
1534         }
1535         KKASSERT(spike == NULL);
1536
1537         /*
1538          * If an error occured abort the transaction
1539          */
1540         if (error) {
1541                 /* XXX undo deletion */
1542                 hammer_abort_transaction(&trans);
1543                 bp->b_resid = bp->b_bufsize;
1544         } else {
1545                 hammer_commit_transaction(&trans);
1546                 bp->b_resid = 0;
1547         }
1548         return(error);
1549 }
1550
1551 /*
1552  * dounlink - disconnect a directory entry
1553  *
1554  * XXX whiteout support not really in yet
1555  */
1556 static int
1557 hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred,
1558                 int flags)
1559 {
1560         struct hammer_transaction trans;
1561         struct namecache *ncp;
1562         hammer_inode_t dip;
1563         hammer_inode_t ip;
1564         hammer_record_ondisk_t rec;
1565         struct hammer_cursor cursor;
1566         int64_t namekey;
1567         int error;
1568
1569         /*
1570          * Calculate the namekey and setup the key range for the scan.  This
1571          * works kinda like a chained hash table where the lower 32 bits
1572          * of the namekey synthesize the chain.
1573          *
1574          * The key range is inclusive of both key_beg and key_end.
1575          */
1576         dip = VTOI(dvp);
1577         ncp = nch->ncp;
1578         namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
1579
1580         hammer_init_cursor_ip(&cursor, dip);
1581         cursor.key_beg.obj_id = dip->obj_id;
1582         cursor.key_beg.key = namekey;
1583         cursor.key_beg.create_tid = dip->obj_asof;
1584         cursor.key_beg.delete_tid = 0;
1585         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1586         cursor.key_beg.obj_type = 0;
1587
1588         cursor.key_end = cursor.key_beg;
1589         cursor.key_end.key |= 0xFFFFFFFFULL;
1590         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
1591
1592         hammer_start_transaction(&trans, dip->hmp);
1593
1594         /*
1595          * Scan all matching records (the chain), locate the one matching
1596          * the requested path component.  info->last_error contains the
1597          * error code on search termination and could be 0, ENOENT, or
1598          * something else.
1599          *
1600          * The hammer_ip_*() functions merge in-memory records with on-disk
1601          * records for the purposes of the search.
1602          */
1603         error = hammer_ip_first(&cursor, dip);
1604         while (error == 0) {
1605                 error = hammer_ip_resolve_data(&cursor);
1606                 if (error)
1607                         break;
1608                 rec = cursor.record;
1609                 if (ncp->nc_nlen == rec->entry.base.data_len &&
1610                     bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) {
1611                         break;
1612                 }
1613                 error = hammer_ip_next(&cursor);
1614         }
1615
1616         /*
1617          * If all is ok we have to get the inode so we can adjust nlinks.
1618          */
1619         if (error == 0) {
1620                 ip = hammer_get_inode(dip->hmp, rec->entry.obj_id,
1621                                       dip->hmp->asof, &error);
1622                 if (error == 0)
1623                         error = hammer_ip_del_directory(&trans, &cursor, dip, ip);
1624                 if (error == 0) {
1625                         cache_setunresolved(nch);
1626                         cache_setvp(nch, NULL);
1627                         /* XXX locking */
1628                         if (ip->vp)
1629                                 cache_inval_vp(ip->vp, CINV_DESTROY);
1630                 }
1631                 hammer_rel_inode(ip, 0);
1632
1633                 if (error == 0) {
1634                         hammer_commit_transaction(&trans);
1635                 } else {
1636                         hammer_abort_transaction(&trans);
1637                 }
1638         }
1639         hammer_done_cursor(&cursor);
1640         return (error);
1641 }
1642
1643 /************************************************************************
1644  *                          FIFO AND SPECFS OPS                         *
1645  ************************************************************************
1646  *
1647  */
1648
1649 static int
1650 hammer_vop_fifoclose (struct vop_close_args *ap)
1651 {
1652         /* XXX update itimes */
1653         return (VOCALL(&fifo_vnode_vops, &ap->a_head));
1654 }
1655
1656 static int
1657 hammer_vop_fiforead (struct vop_read_args *ap)
1658 {
1659         int error;
1660
1661         error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1662         /* XXX update access time */
1663         return (error);
1664 }
1665
1666 static int
1667 hammer_vop_fifowrite (struct vop_write_args *ap)
1668 {
1669         int error;
1670
1671         error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1672         /* XXX update access time */
1673         return (error);
1674 }
1675
1676 static int
1677 hammer_vop_specclose (struct vop_close_args *ap)
1678 {
1679         /* XXX update itimes */
1680         return (VOCALL(&spec_vnode_vops, &ap->a_head));
1681 }
1682
1683 static int
1684 hammer_vop_specread (struct vop_read_args *ap)
1685 {
1686         /* XXX update access time */
1687         return (VOCALL(&spec_vnode_vops, &ap->a_head));
1688 }
1689
1690 static int
1691 hammer_vop_specwrite (struct vop_write_args *ap)
1692 {
1693         /* XXX update last change time */
1694         return (VOCALL(&spec_vnode_vops, &ap->a_head));
1695 }
1696