2 * Copyright (c) 2005-2008 Daniel Braniss <danny@cs.huji.ac.il>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * $FreeBSD: src/sys/dev/iscsi/initiator/isc_soc.c,v 1.6 2009/06/25 18:46:30 kib Exp $
30 | $Id: isc_soc.c,v 1.26 2007/05/19 06:09:01 danny Exp danny $
33 #include "opt_iscsi_initiator.h"
35 #include <sys/param.h>
36 #include <sys/kernel.h>
38 #include <sys/systm.h>
39 #include <sys/malloc.h>
40 #include <sys/ctype.h>
41 #include <sys/errno.h>
42 #include <sys/sysctl.h>
45 #include <sys/socketvar.h>
46 #include <sys/socket.h>
47 #include <sys/protosw.h>
49 #include <sys/ioccom.h>
50 #include <sys/queue.h>
51 #include <sys/kthread.h>
52 #include <sys/syslog.h>
56 #include <sys/eventhandler.h>
57 #include <sys/socketops.h>
59 #include <sys/thread2.h>
60 #include <sys/mutex2.h>
61 #include <sys/mplock2.h>
63 #include <bus/cam/cam.h>
64 #include <bus/cam/cam_ccb.h>
66 #include <dev/disk/iscsi/initiator/iscsi.h>
67 #include <dev/disk/iscsi/initiator/iscsivar.h>
75 static int ou_refcnt = 0;
78 | function for counting refs on external storage for mbuf
85 debug(3, "ou_refcnt=%d arg=%p b=%p", ou_refcnt, a, a->buf);
86 atomic_add_int(&a->refcnt, 1);
90 | function for freeing external storage for mbuf
97 if (atomic_fetchadd_int(&a->refcnt, -1) == 1)
99 debug(3, "ou_refcnt=%d a=%p b=%p", ou_refcnt, a, a->buf);
100 kfree(a->buf, M_ISCSI);
106 isc_sendPDU(isc_session_t *sp, pduq_t *pq)
108 struct mbuf *mh, **mp;
109 pdu_t *pp = &pq->pdu;
114 | mbuf for the iSCSI header
116 MGETHDR(mh, MB_TRYWAIT, MT_DATA);
117 mh->m_len = mh->m_pkthdr.len = sizeof(union ipdu_u);
118 mh->m_pkthdr.rcvif = NULL;
119 MH_ALIGN(mh, sizeof(union ipdu_u));
120 bcopy(&pp->ipdu, mh->m_data, sizeof(union ipdu_u));
124 pq->pdu.hdr_dig = sp->hdrDigest(&pp->ipdu, sizeof(union ipdu_u), 0);
127 | Add any AHS to the iSCSI hdr mbuf
128 | XXX Assert: (mh->m_pkthdr.len + pp->ahs_len) < MHLEN
130 bcopy(pp->ahs, (mh->m_data + mh->m_len), pp->ahs_len);
131 mh->m_len += pp->ahs_len;
132 mh->m_pkthdr.len += pp->ahs_len;
135 pq->pdu.hdr_dig = sp->hdrDigest(&pp->ahs, pp->ahs_len, pq->pdu.hdr_dig);
138 debug(2, "hdr_dig=%x", pq->pdu.hdr_dig);
140 | Add header digest to the iSCSI hdr mbuf
141 | XXX Assert: (mh->m_pkthdr.len + 4) < MHLEN
143 bcopy(&pp->hdr_dig, (mh->m_data + mh->m_len), sizeof(int));
144 mh->m_len += sizeof(int);
145 mh->m_pkthdr.len += sizeof(int);
153 while(len & 03) // the specs say it must be int alligned
158 MGET(md, MB_TRYWAIT, MT_DATA);
161 l = min(MCLBYTES, len);
162 debug(5, "setting ext_free(arg=%p len/l=%d/%d)", pq->buf, len, l);
163 md->m_ext.ext_buf = pq->buf;
164 md->m_ext.ext_free = ext_free;
165 md->m_ext.ext_ref = ext_ref;
166 md->m_ext.ext_arg = pq;
167 md->m_ext.ext_size = l;
168 md->m_flags |= M_EXT;
169 md->m_data = pp->ds + off;
172 mh->m_pkthdr.len += l;
182 pp->ds_dig = sp->dataDigest(pp->ds, pp->ds_len, 0);
184 MGET(me, MB_TRYWAIT, MT_DATA);
185 me->m_len = sizeof(int);
186 MH_ALIGN(mh, sizeof(int));
187 bcopy(&pp->ds_dig, me->m_data, sizeof(int));
189 mh->m_pkthdr.len += sizeof(int);
192 if((error = sosend(sp->soc, NULL, NULL, mh, 0, 0, curthread)) != 0) {
193 sdebug(3, "error=%d", error);
197 getmicrouptime(&sp->stats.t_sent);
200 #else /* NO_USE_MBUF */
202 isc_sendPDU(isc_session_t *sp, pduq_t *pq)
204 struct uio *uio = &pq->uio;
206 pdu_t *pp = &pq->pdu;
211 bzero(uio, sizeof(struct uio));
212 uio->uio_rw = UIO_WRITE;
213 uio->uio_segflg = UIO_SYSSPACE;
214 uio->uio_td = curthread;
215 uio->uio_iov = iv = pq->iov;
217 iv->iov_base = &pp->ipdu;
218 iv->iov_len = sizeof(union ipdu_u);
219 uio->uio_resid = pq->len;
222 pq->pdu.hdr_dig = sp->hdrDigest(&pp->ipdu, sizeof(union ipdu_u), 0);
224 iv->iov_base = pp->ahs;
225 iv->iov_len = pp->ahs_len;
229 pq->pdu.hdr_dig = sp->hdrDigest(&pp->ahs, pp->ahs_len, pq->pdu.hdr_dig);
232 debug(2, "hdr_dig=%x", pq->pdu.hdr_dig);
233 iv->iov_base = &pp->hdr_dig;
234 iv->iov_len = sizeof(int);
238 iv->iov_base = pp->ds;
239 iv->iov_len = pp->ds_len;
240 while(iv->iov_len & 03) // the specs say it must be int alligned
245 pp->ds_dig = sp->dataDigest(pp->ds, pp->ds_len, 0);
246 iv->iov_base = &pp->ds_dig;
247 iv->iov_len = sizeof(int);
250 uio->uio_iovcnt = iv - pq->iov;
251 sdebug(5, "opcode=%x iovcnt=%d uio_resid=%d itt=%x",
252 pp->ipdu.bhs.opcode, uio->uio_iovcnt, uio->uio_resid,
253 ntohl(pp->ipdu.bhs.itt));
254 sdebug(5, "sp=%p sp->soc=%p uio=%p sp->td=%p",
255 sp, sp->soc, uio, sp->td);
257 len = uio->uio_resid;
258 error = sosend(sp->soc, NULL, uio, 0, 0, 0, curthread);
259 if(uio->uio_resid == 0 || error || len == uio->uio_resid) {
261 sdebug(2, "uio->uio_resid=%d uio->uio_iovcnt=%d error=%d len=%d",
262 uio->uio_resid, uio->uio_iovcnt, error, len);
264 error = EAGAIN; // 35
271 sdebug(1, "uio->uio_resid=%d uio->uio_iovcnt=%d",
272 uio->uio_resid, uio->uio_iovcnt);
274 len -= uio->uio_resid;
275 while(uio->uio_iovcnt > 0) {
276 if(iv->iov_len > len) {
277 caddr_t bp = (caddr_t)iv->iov_base;
280 iv->iov_base = (void *)&bp[len];
288 } while(uio->uio_resid);
292 getmicrouptime(&sp->stats.t_sent);
298 #endif /* USE_MBUF */
301 | wait till a PDU header is received
305 The format of the BHS is:
307 Byte/ 0 | 1 | 2 | 3 |
309 |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|
310 +---------------+---------------+---------------+---------------+
311 0|.|I| Opcode |F| Opcode-specific fields |
312 +---------------+---------------+---------------+---------------+
313 4|TotalAHSLength | DataSegmentLength |
314 +---------------+---------------+---------------+---------------+
315 8| LUN or Opcode-specific fields |
318 +---------------+---------------+---------------+---------------+
319 16| Initiator Task Tag |
320 +---------------+---------------+---------------+---------------+
321 20/ Opcode-specific fields /
323 +---------------+---------------+---------------+---------------+
327 so_getbhs(isc_session_t *sp)
329 bhs_t *bhs = &sp->bhs;
330 struct uio *uio = &sp->uio;
331 struct iovec *iov = &sp->iov;
337 iov->iov_len = sizeof(bhs_t);
341 uio->uio_rw = UIO_READ;
342 uio->uio_segflg = UIO_SYSSPACE;
343 uio->uio_td = curthread; // why ...
344 uio->uio_resid = sizeof(bhs_t);
347 error = so_pru_soreceive(sp->soc, NULL, uio, NULL, NULL, &flags);
350 debug(2, "error=%d so_error=%d uio->uio_resid=%zd iov.iov_len=%zd",
352 sp->soc->so_error, uio->uio_resid, iov->iov_len);
353 if(!error && (uio->uio_resid > 0)) {
354 error = EPIPE; // was EAGAIN
355 debug(2, "error=%d so_error=%d uio->uio_resid=%zd iov.iov_len=%zd "
358 sp->soc->so_error, uio->uio_resid, iov->iov_len,
366 | so_recv gets called when there is at least
367 | an iSCSI header in the queue
370 so_recv(isc_session_t *sp, pduq_t *pq)
372 struct socket *so = sp->soc;
374 struct uio *uio = &pq->uio;
384 | now calculate how much data should be in the buffer
385 | NOTE: digest is not verified/calculated - yet
393 pp->ahs_len = bhs->AHSLength * 4;
400 #if BYTE_ORDER == LITTLE_ENDIAN
401 pp->ds_len = ((n & 0x00ff0000) >> 16)
403 | ((n & 0x000000ff) << 16);
414 if((sp->opt.maxRecvDataSegmentLength > 0) && (len > sp->opt.maxRecvDataSegmentLength)) {
416 xdebug("impossible PDU length(%d) opt.maxRecvDataSegmentLength=%d",
417 len, sp->opt.maxRecvDataSegmentLength);
418 // deep trouble here, probably all we can do is
419 // force a disconnect, XXX: check RFC ...
421 "so_recv: impossible PDU length(%ld) from iSCSI %s/%s\n",
422 len, sp->opt.targetAddress, sp->opt.targetName);
425 | XXX: this will really screwup the stream.
426 | should clear up the buffer till a valid header
427 | is found, or just close connection ...
428 | should read the RFC.
440 uio->uio_td = curthread; // why ...
442 // it's more efficient to use mbufs -- why?
443 if(bhs->opcode == ISCSI_READ_DATA) {
446 opq = i_search_hld(sp, pq->pdu.ipdu.bhs.itt, 1);
448 union ccb *ccb = opq->ccb;
449 struct ccb_scsiio *csio = &ccb->csio;
450 pdu_t *opp = &opq->pdu;
451 scsi_req_t *cmd = &opp->ipdu.scsi_req;
452 data_in_t *rcmd = &pq->pdu.ipdu.data_in;
453 bhs_t *bhp = &opp->ipdu.bhs;
456 if(bhp->opcode == ISCSI_SCSI_CMD
458 && (ntohl(cmd->edtlen) >= pq->pdu.ds_len)) {
459 struct iovec *iov = pq->iov;
460 iov->iov_base = csio->data_ptr + ntohl(rcmd->bo);
461 iov->iov_len = pq->pdu.ds_len;
463 uio->uio_rw = UIO_READ;
464 uio->uio_segflg = UIO_SYSSPACE;
467 if(len > pq->pdu.ds_len) {
468 pq->iov[1].iov_base = &r;
469 pq->iov[1].iov_len = len - pq->pdu.ds_len;
474 sdebug(4, "uio_resid=0x%zx itt=0x%x bp=%p bo=%x len=%x/%x",
476 ntohl(pq->pdu.ipdu.bhs.itt),
477 csio->data_ptr, ntohl(rcmd->bo), ntohl(cmd->edtlen), pq->pdu.ds_len);
483 * Here we call so_pru_receive with a sockbuf so we can obtain
484 * the mbuf chain that can be assigned later to the pq->mp,
485 * which is the mbuf wanted.
486 * For the moment, resid will be saved in the uio.
489 error = so_pru_soreceive(so, NULL, NULL, &sbp, NULL, &flags);
491 uio->uio_resid = sbp.sb_climit - sbp.sb_cc;
492 //if(error == EAGAIN)
493 // XXX: this needs work! it hangs iscontrol
494 if(error || uio->uio_resid)
498 sdebug(6, "len=%d] opcode=0x%x ahs_len=0x%x ds_len=0x%x",
499 pq->len, bhs->opcode, pp->ahs_len, pp->ds_len);
501 max = ntohl(bhs->MaxCmdSN);
502 exp = ntohl(bhs->ExpStSN);
505 max > exp - _MAXINCR) {
506 sdebug(2, "bad cmd window size");
507 error = EIO; // XXX: for now;
511 if(SNA_GT(max, sn->maxCmd))
514 if(SNA_GT(exp, sn->expCmd))
517 sp->cws = sn->maxCmd - sn->expCmd + 1;
522 // XXX: need some work here
523 xdebug("have a problem, error=%d", error);
524 pdu_free(sp->isc, pq);
525 if(!error && uio->uio_resid > 0)
531 | wait for something to arrive.
532 | and if the pdu is without errors, process it.
535 so_input(isc_session_t *sp)
542 | first read in the iSCSI header
544 error = so_getbhs(sp);
549 pq = pdu_alloc(sp->isc, M_NOWAIT);
550 if(pq == NULL) { // XXX: might cause a deadlock ...
551 debug(3, "out of pdus, wait");
552 pq = pdu_alloc(sp->isc, M_NOWAIT); // OK to WAIT
554 pq->pdu.ipdu.bhs = sp->bhs;
555 pq->len = sizeof(bhs_t); // so far only the header was read
556 error = so_recv(sp, pq);
558 error += 0x800; // XXX: just to see the error.
560 // XXX: close connection and exit
564 getmicrouptime(&sp->stats.t_recv);
572 | one per active (connected) session.
573 | this thread is responsible for reading
574 | in packets from the target.
579 isc_session_t *sp = (isc_session_t *)vp;
580 struct socket *so = sp->soc;
591 while((sp->flags & (ISC_CON_RUN | ISC_LINK_UP)) == (ISC_CON_RUN | ISC_LINK_UP)) {
593 if(sp->soc == NULL || !(so->so_state & SS_ISCONNECTED)) {
594 debug(2, "sp->soc=%p", sp->soc);
597 error = so_input(sp);
599 iscsi_lock_ex(&sp->io_mtx);
600 if(sp->flags & ISC_OWAITING) {
603 iscsi_unlock_ex(&sp->io_mtx);
604 } else if(error == EPIPE) {
607 else if(error == EAGAIN) {
608 if(so->so_state & SS_ISCONNECTED)
609 // there seems to be a problem in 6.0 ...
610 tsleep(sp, 0, "iscsoc", 2*hz);
613 sdebug(2, "terminated, flags=%x so_state=%x error=%d proc=%p",
614 sp->flags, so ? so->so_state : 0, error, sp->proc);
615 if((sp->proc != NULL) && sp->signal) {
617 ksignal(sp->proc, sp->signal);
618 PROC_UNLOCK(sp->proc);
619 sp->flags |= ISC_SIGNALED;
620 sdebug(2, "pid=%d signaled(%d)", sp->proc->p_pid, sp->signal);
623 // we have to do something ourselves
624 // like closing this session ...
627 | we've been terminated
629 // do we need this mutex ...?
630 //iscsi_lock_ex(&sp->io_mtx);
631 sp->flags &= ~(ISC_CON_RUNNING | ISC_LINK_UP);
633 //iscsi_unlock_ex(&sp->io_mtx);
635 sdebug(2, "dropped ISC_CON_RUNNING");
641 isc_stop_receiver(isc_session_t *sp)
644 debug(3, "sp=%p sp->sid=%d sp->soc=%p", sp, sp ? sp->sid : 0,
645 sp ? sp->soc : NULL);
646 iscsi_lock_ex(&sp->io_mtx);
647 sp->flags &= ~ISC_LINK_UP;
648 if (sp->flags & ISC_CON_RUNNING) {
649 issleep(&sp->soc, &sp->io_mtx, 0, "iscstpc", 5*hz);
651 iscsi_unlock_ex(&sp->io_mtx);
654 soshutdown(sp->soc, SHUT_RD);
656 iscsi_lock_ex(&sp->io_mtx);
657 sdebug(3, "soshutdown");
658 sp->flags &= ~ISC_CON_RUN;
659 while(sp->flags & ISC_CON_RUNNING) {
660 sdebug(3, "waiting flags=%x", sp->flags);
661 issleep(&sp->soc, &sp->io_mtx, 0, "iscstpc", hz);
663 iscsi_unlock_ex(&sp->io_mtx);
665 if (sp->fp != NULL) {
669 /* sofree(sp->soc); fp deals with socket termination */
676 isc_start_receiver(isc_session_t *sp)
680 sp->flags |= ISC_CON_RUN | ISC_LINK_UP;
681 sp->flags |= ISC_CON_RUNNING;
683 kthread_create(isc_soc, sp, &sp->soc_thr, "iscsi%d", sp->sid);