dm - undo my pointless moving of dm.h
[dragonfly.git] / sys / dev / disk / dm / targets / dmirror / dm_target_dmirror.c
CommitLineData
919cd235
AH
1/*
2 * Copyright (c) 2010 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35/*
36 * This file implements initial version of a mirror target
37 */
38#include <sys/types.h>
39#include <sys/param.h>
40
41#include <sys/bio.h>
42#include <sys/buf.h>
43#include <sys/malloc.h>
44#include <sys/uuid.h>
45#include <sys/vnode.h>
46
a84e173e 47#include <dev/disk/dm/dm.h>
919cd235
AH
48MALLOC_DEFINE(M_DMDMIRROR, "dm_dmirror", "Device Mapper Target DMIRROR");
49
50/* segdesc flags */
51#define MEDIA_UNSTABLE 0x0001
52#define MEDIA_READ_DEGRADED 0x0002
53#define MEDIA_WRITE_DEGRADED 0x0004
54#define MEDIA_MASTER 0x0008
55#define UNINITIALIZED 0x0010
56#define OLD_UNSTABLE 0x0020
57#define OLD_MSATER 0x0040
58
59/* dmirror disk flags */
60#define DISK_ONLINE 0x0001
61
62
63#define dmirror_set_bio_disk(bio, x) ((bio)->bio_caller_info1.ptr = (x))
64#define dmirror_get_bio_disk(bio) ((bio)?((bio)->bio_caller_info1.ptr):NULL)
65#define dmirror_set_bio_seg(bio, x) ((bio)->bio_caller_info2.offset = (x))
66#define dmirror_get_bio_segno(bio) ((bio)?((bio)->bio_caller_info2.offset):0)
67
68#define dmirror_set_bio_retries(bio, x) ((bio)->bio_caller_info3.value = (x))
69#define dmirror_get_bio_retries(bio) ((bio)?((bio)->bio_caller_info3.value):0)
70
71#define dmirror_set_bio_mbuf(bio, x) ((bio)->bio_caller_info3.ptr = (x))
72#define dmirror_get_bio_mbuf(bio) ((bio)?((bio)->bio_caller_info3.ptr):NULL)
73
74
75
76/* Segment descriptor for each logical segment */
77typedef struct segdesc {
78 uint32_t flags; /* Flags, including state */
79 uint32_t zf_bitmap; /* Zero-fill bitmap */
80 uint8_t disk_no;
81 uint8_t spare1;
82 uint16_t spare2;
83 uint32_t spare3;
84 /* XXX: some timestamp/serial */
85} segdesc_t;
86
87typedef struct dmirror_disk {
88 uint32_t flags;
89 dm_pdev_t *pdev;
90} dmirror_disk_t;
91
92typedef struct target_dmirror_config {
93 size_t params_len;
94 dmirror_disk_t disks[4];
95 uint8_t ndisks;
96 /* XXX: uuid stuff */
97
98} dm_target_dmirror_config_t;
99
100static
101struct bio*
102dmirror_clone_bio(struct bio *obio)
103{
104 struct bio *bio;
105 struct buf *mbp;
106 struct buf *bp;
107
108 mbp = obio->bio_buf;
109 bp = getpbuf(NULL);
110
111 BUF_KERNPROC(bp);
112 bp->b_vp = mbp->b_vp;
113 bp->b_cmd = mbp->b_cmd;
114 bp->b_data = (char *)mbp->b_data;
115 bp->b_resid = bp->b_bcount = mbp->b_bcount;
116 bp->b_bufsize = bp->b_bcount;
117
118 bio = &bp->b_bio1;
119 bio->bio_offset = obio->bio_offset;
120
121 return (bio);
122}
123
124static void
125dmirror_write_done(struct bio *bio)
126{
127 dmirror_disk_t disk;
128 off_t segno;
129 struct bio *obio, *mbio;
130 int retries;
131
132 disk = dmirror_get_bio_disk(bio);
133 segno = dmirror_get_bio_segno(bio);
134 mbio = dmirror_get_bio_mbuf(bio);
135
136 if (bio->bio_buf->b_flags & B_ERROR) {
137 /* write failed */
138 }
139
140 obio = pop_bio(bio);
141 biodone(obio);
142}
143
144void
145dmirror_issue_write(dmirror_disk_t disk, struct bio *bio)
146{
147 dmirror_set_bio_disk(bio, disk);
148 dmirror_set_bio_segno(bio, SEGNO_FROM_OFFSET(bio->bio_offset));
149
150 bio->bio_done = dmirror_write_done;
151 vn_strategy(disk->pdev, bio);
152}
153
154void
155dmirror_write(dm_target_crypt_config_t config, struct bio *bio)
156{
157 dmirror_disk_t disk, m_disk;
158 struct bio *wbio1, *wbio2;
159 segdesc_t segdesc;
160 int i, masters = 0;
161
162 for(i = 0; i < XXX config->ndisks; i++) {
163 disk = &config->disks[i];
164 segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset);
165 if (segdesc->flags & MEDIA_MASTER) {
166 if (++masters == 1)
167 m_disk = disk;
168 }
169 }
170
171 if (masters == 1) {
172 dmirror_set_bio_mbuf(bio, NULL);
173 dmirror_issue_write(m_disk, bio);
174 } else {
175 wbio1 = dmirror_clone_bio(bio);
176 wbio2 = dmirror_clone_bio(bio);
177 dmirror_set_bio_mbuf(wbio1, bio);
178 dmirror_set_bio_mbuf(wbio2, bio);
179 dmirror_issue_write(XXX disk1, wbio1);
180 dmirror_issue_write(XXX disk2, wbio2);
181 }
182
183}
184
185static void
186segdesc_set_flag(dmirror_disk_t disk, off_t segno, int flag)
187{
188 /*
189 * XXX: set the flag on the in-memory descriptor and write back to disks.
190 */
191 foo |= flag;
192}
193
194
195static void
196segdesc_clear_flag(dmirror_disk_t disk, off_t segno, int flag)
197{
198 /*
199 * XXX: set the flag on the in-memory descriptor and write back to disks.
200 */
201 foo &= ~flag;
202}
203
204static void
205dmirror_read_done(struct bio *bio)
206{
207 dmirror_disk_t disk;
208 off_t segno;
209 struct bio *obio;
210 int retries;
211
212 disk = dmirror_get_bio_disk(bio);
213 segno = dmirror_get_bio_segno(bio);
214 retries = dmirror_get_bio_retries(bio);
215
216 if (bio->bio_buf->b_flags & B_ERROR) {
217 /* read failed, so redispatch to a different disk */
218 segdesc_set_flag(disk, segno, MEDIA_READ_DEGRADED);
219 /* XXX: set other disk to master, if possible */
220 if (retries < disk->config->max_retries) {
221 dmirror_set_bio_retries(bio, retries + 1);
222 /*
223 * XXX: how do we restore the bio to health? Like this?
224 */
225 bio->bio_buf->b_flags &= ~(B_ERROR | B_INVAL);
226 /*
227 * XXX: something tells me that dispatching stuff from a
228 * biodone routine is not the greatest idea
229 */
230 dmirror_issue_read(next_disk, bio);
231 return;
232 }
233 }
234
235 obio = pop_bio(bio);
236 biodone(obio);
237}
238
239void
240dmirror_issue_read(dmirror_disk_t disk, struct bio *bio)
241{
242 dmirror_set_bio_disk(bio, disk);
243 dmirror_set_bio_segno(bio, SEGNO_FROM_OFFSET(bio->bio_offset));
244
245 bio->bio_done = dmirror_read_done;
246 vn_strategy(disk->pdev, bio);
247}
248
249void
250dmirror_read(dm_target_crypt_config_t config, struct bio *bio)
251{
252 dmirror_disk_t disk, m_disk;
253 segdesc_t segdesc;
254 int i, masters = 0;
255
256 for(i = 0; i < XXX config->ndisks; i++) {
257 disk = &config->disks[i];
258 segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset);
259 if (segdesc->flags & MEDIA_MASTER) {
260 if (++masters == 1)
261 m_disk = disk;
262 }
263 }
264
265 if (masters > 1) {
266 /* XXX: fail. */
267 biodone(foo);
268 return;
269 }
270
271 if (masters == 1) {
272 segdesc = SEGDESC_FROM_OFFSET(m_disk, bio->bio_offset);
273 if (segdesc->flags & UNINITIALIZED) {
274 /* XXX: ... */
275 }
276 dmirror_issue_read(m_disk, bio);
277 } else {
278 /* dispatch read to any disk */
279 /* but try not to send to a READ_DEGRADED drive */
280 m_disk = NULL;
281 for (i = 0; i < config->ndisks; i++) {
282 disk = &config->disks[i];
283 segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset);
284 if (!(segdesc->flags & MEDIA_READ_DEGRADED)) {
285 m_disk = disk;
286 break;
287 }
288 }
289 /* XXX: do the uninitialized magic here, too */
290 if (m_disk) {
291 /*
292 * XXX: we found some non-degraded disk. We might want to
293 * optimize performance by sending reads to different disks,
294 * not just the first one.
295 */
296 dmirror_set_bio_retries(bio, 0);
297 dmirror_issue_read(m_disk, bio);
298 } else {
299 /* XXX: all disks are read degraded, just sent to any */
300 m_disk = &config->disks[i];
301 dmirror_set_bio_retries(bio, 0);
302 dmirror_issue_read(m_disk, bio);
303 }
304 }
305}
306
307/* Strategy routine called from dm_strategy. */
308/*
309 * Do IO operation, called from dmstrategy routine.
310 */
311int
312dm_target_dmirror_strategy(dm_table_entry_t * table_en, struct buf * bp)
313{
314 struct bio *bio, *split_bio1, *split_bio2;
315 struct buf *bp;
316 off_t bseg, eseg, seg_end;
317 size_t fsb;
318 int split_transaction = 0;
319
320 dm_target_crypt_config_t *priv;
321 priv = table_en->target_config;
322
323 if ((bp->b_cmd == BUF_CMD_READ) || (bp->b_cmd == BUF_CMD_WRITE)) {
324 /* Get rid of stuff we can't really handle */
325 if (((bp->b_bcount % DEV_BSIZE) != 0) || (bp->b_bcount == 0)) {
326 kprintf("dm_target_dmirror_strategy: can't really handle bp->b_bcount = %d\n", bp->b_bcount);
327 bp->b_error = EINVAL;
328 bp->b_flags |= B_ERROR | B_INVAL;
329 biodone(&bp->b_bio1);
330 return 0;
331 }
332
333 bseg = SEGNO_FROM_OFFSET(bp->b_bio1.bio_offset);
334 eseg = SEGNO_FROM_OFFSET(bp->b_bio1.bio_offset + bp->b_resid);
335 seg_end = OFFSET_FROM_SEGNO(eseg);
336
337 if (bseg != eseg) {
338 split_transaction = 1;
339 /* fsb = first segment bytes (bytes in the first segment) */
340 fsb = seg_end - bp->b_bio1.bio_offset;
341
342 nestbuf = getpbuf(NULL);
343 nestiobuf_setup(&bp->b_bio1, nestbuf, 0, fsb);
344 split_bio1 = push_bio(&nestbuf->b_bio1);
345 split_bio1->bio_offset = bp->b_bio1.bio_offset +
346 priv->block_offset*DEV_BSIZE;
347
348 nestbuf = getpbuf(NULL);
349 nestiobuf_setup(&bp->b_bio1, nestbuf, fsb, bp->b_resid - fsb);
350 split_bio2 = push_bio(&nestbuf->b_bio1);
351 split_bio2->bio_offset = bp->b_bio1.bio_offset + fsb +
352 priv->block_offset*DEV_BSIZE;
353 }
354 }
355
356 switch (bp->b_cmd) {
357 case BUF_CMD_READ:
358 if (split_transaction) {
359 dmirror_read(priv, split_bio1);
360 dmirror_read(priv, split_bio2);
361 } else {
362 bio = push_bio(&bp->b_bio1);
363 bio->bio_offset = bp->b_bio1.bio_offset + priv->block_offset*DEV_BSIZE;
364 dmirror_read(priv, bio);
365 }
366 break;
367
368 case BUF_CMD_WRITE:
369 if (split_transaction) {
370 dmirror_write(priv, split_bio1);
371 dmirror_write(priv, split_bio2);
372 } else {
373 bio = push_bio(&bp->b_bio1);
374 bio->bio_offset = bp->b_bio1.bio_offset + priv->block_offset*DEV_BSIZE;
375 dmirror_write(priv, bio);
376 }
377 break;
378
379 default:
380 /* XXX: clone... */
381 vn_strategy(priv->pdev[0]->pdev_vnode, &bp->b_bio1);
382 vn_strategy(priv->pdev[1]->pdev_vnode, &bp->b_bio1);
383 }
384
385 return 0;
386
387}
388
389/* XXX: add missing dm functions */