2 * httpread - Manage reading file(s) from HTTP/TCP socket
4 * Copyright 2008 Atheros Communications
6 * This software may be distributed under the terms of the BSD license.
7 * See README for more details.
9 * The files are buffered via internal callbacks from eloop, then presented to
10 * an application callback routine when completely read into memory. May also
11 * be used if no file is expected but just to get the header, including HTTP
12 * replies (e.g. HTTP/1.1 200 OK etc.).
14 * This does not attempt to be an optimally efficient implementation, but does
15 * attempt to be of reasonably small size and memory consumption; assuming that
16 * only small files are to be read. A maximum file size is provided by
17 * application and enforced.
19 * It is assumed that the application does not expect any of the following:
20 * -- transfer encoding other than chunked
22 * It is assumed that, even if the other side requested that the connection be
23 * kept open, that we will close it (thus HTTP messages sent by application
24 * should have the connection closed field); this is allowed by HTTP/1.1 and
25 * simplifies things for us.
28 * -- HTTP header may not exceed a hard-coded size.
31 * This code would be massively simpler without some of the new features of
32 * HTTP/1.1, especially chunked data.
42 /* Tunable parameters */
43 #define HTTPREAD_READBUF_SIZE 1024 /* read in chunks of this size */
44 #define HTTPREAD_HEADER_MAX_SIZE 4096 /* max allowed for headers */
45 #define HTTPREAD_BODYBUF_DELTA 4096 /* increase allocation by this */
48 /* httpread_debug -- set this global variable > 0 e.g. from debugger
49 * to enable debugs (larger numbers for more debugs)
50 * Make this a #define of 0 to eliminate the debugging code.
52 int httpread_debug = 99;
54 #define httpread_debug 0 /* eliminates even the debugging code */
58 /* control instance -- actual definition (opaque to application)
61 /* information from creation */
62 int sd; /* descriptor of TCP socket to read from */
63 void (*cb)(struct httpread *handle, void *cookie,
64 enum httpread_event e); /* call on event */
65 void *cookie; /* pass to callback */
66 int max_bytes; /* maximum file size else abort it */
67 int timeout_seconds; /* 0 or total duration timeout period */
69 /* dynamically used information follows */
71 int got_hdr; /* nonzero when header is finalized */
72 char hdr[HTTPREAD_HEADER_MAX_SIZE+1]; /* headers stored here */
75 enum httpread_hdr_type hdr_type;
76 int version; /* 1 if we've seen 1.1 */
77 int reply_code; /* for type REPLY, e.g. 200 for HTTP/1.1 200 OK */
78 int got_content_length; /* true if we know content length for sure */
79 int content_length; /* body length, iff got_content_length */
80 int chunked; /* nonzero for chunked data */
83 int got_body; /* nonzero when body is finalized */
86 int body_alloc_nbytes; /* amount allocated */
88 int got_file; /* here when we are done */
90 /* The following apply if data is chunked: */
91 int in_chunk_data; /* 0=in/at header, 1=in the data or tail*/
92 int chunk_start; /* offset in body of chunk hdr or data */
93 int chunk_size; /* data of chunk (not hdr or ending CRLF)*/
94 int in_trailer; /* in header fields after data (chunked only)*/
96 trailer_line_begin = 0,
97 trailer_empty_cr, /* empty line + CR */
104 /* Check words for equality, where words consist of graphical characters
105 * delimited by whitespace
106 * Returns nonzero if "equal" doing case insensitive comparison.
108 static int word_eq(char *s1, char *s2)
117 if (isalpha(c1) && isupper(c1))
119 if (isalpha(c2) && isupper(c2))
123 if (end1 || end2 || c1 != c2)
126 return end1 && end2; /* reached end of both words? */
130 static void httpread_timeout_handler(void *eloop_data, void *user_ctx);
132 /* httpread_destroy -- if h is non-NULL, clean up
133 * This must eventually be called by the application following
134 * call of the application's callback and may be called
135 * earlier if desired.
137 void httpread_destroy(struct httpread *h)
139 if (httpread_debug >= 10)
140 wpa_printf(MSG_DEBUG, "ENTER httpread_destroy(%p)", h);
144 eloop_cancel_timeout(httpread_timeout_handler, NULL, h);
145 eloop_unregister_sock(h->sd, EVENT_TYPE_READ);
148 os_memset(h, 0, sizeof(*h)); /* aid debugging */
149 h->sd = -1; /* aid debugging */
154 /* httpread_timeout_handler -- called on excessive total duration
156 static void httpread_timeout_handler(void *eloop_data, void *user_ctx)
158 struct httpread *h = user_ctx;
159 wpa_printf(MSG_DEBUG, "httpread timeout (%p)", h);
160 (*h->cb)(h, h->cookie, HTTPREAD_EVENT_TIMEOUT);
164 /* Analyze options only so far as is needed to correctly obtain the file.
165 * The application can look at the raw header to find other options.
167 static int httpread_hdr_option_analyze(
169 char *hbp /* pointer to current line in header buffer */
172 if (word_eq(hbp, "CONTENT-LENGTH:")) {
173 while (isgraph(*hbp))
175 while (*hbp == ' ' || *hbp == '\t')
179 h->content_length = atol(hbp);
180 h->got_content_length = 1;
183 if (word_eq(hbp, "TRANSFER_ENCODING:") ||
184 word_eq(hbp, "TRANSFER-ENCODING:")) {
185 while (isgraph(*hbp))
187 while (*hbp == ' ' || *hbp == '\t')
189 /* There should (?) be no encodings of interest
190 * other than chunked...
192 if (word_eq(hbp, "CHUNKED")) {
194 h->in_chunk_data = 0;
195 /* ignore possible ;<parameters> */
199 /* skip anything we don't know, which is a lot */
204 static int httpread_hdr_analyze(struct httpread *h)
206 char *hbp = h->hdr; /* pointer into h->hdr */
207 int standard_first_line = 1;
209 /* First line is special */
210 h->hdr_type = HTTPREAD_HDR_TYPE_UNKNOWN;
213 if (os_strncmp(hbp, "HTTP/", 5) == 0) {
214 h->hdr_type = HTTPREAD_HDR_TYPE_REPLY;
215 standard_first_line = 0;
217 if (hbp[0] == '1' && hbp[1] == '.' &&
218 isdigit(hbp[2]) && hbp[2] != '0')
220 while (isgraph(*hbp))
222 while (*hbp == ' ' || *hbp == '\t')
226 h->reply_code = atol(hbp);
227 } else if (word_eq(hbp, "GET"))
228 h->hdr_type = HTTPREAD_HDR_TYPE_GET;
229 else if (word_eq(hbp, "HEAD"))
230 h->hdr_type = HTTPREAD_HDR_TYPE_HEAD;
231 else if (word_eq(hbp, "POST"))
232 h->hdr_type = HTTPREAD_HDR_TYPE_POST;
233 else if (word_eq(hbp, "PUT"))
234 h->hdr_type = HTTPREAD_HDR_TYPE_PUT;
235 else if (word_eq(hbp, "DELETE"))
236 h->hdr_type = HTTPREAD_HDR_TYPE_DELETE;
237 else if (word_eq(hbp, "TRACE"))
238 h->hdr_type = HTTPREAD_HDR_TYPE_TRACE;
239 else if (word_eq(hbp, "CONNECT"))
240 h->hdr_type = HTTPREAD_HDR_TYPE_CONNECT;
241 else if (word_eq(hbp, "NOTIFY"))
242 h->hdr_type = HTTPREAD_HDR_TYPE_NOTIFY;
243 else if (word_eq(hbp, "M-SEARCH"))
244 h->hdr_type = HTTPREAD_HDR_TYPE_M_SEARCH;
245 else if (word_eq(hbp, "M-POST"))
246 h->hdr_type = HTTPREAD_HDR_TYPE_M_POST;
247 else if (word_eq(hbp, "SUBSCRIBE"))
248 h->hdr_type = HTTPREAD_HDR_TYPE_SUBSCRIBE;
249 else if (word_eq(hbp, "UNSUBSCRIBE"))
250 h->hdr_type = HTTPREAD_HDR_TYPE_UNSUBSCRIBE;
254 if (standard_first_line) {
258 while (isgraph(*hbp))
260 while (*hbp == ' ' || *hbp == '\t')
263 * Find length, allocate memory for translated
264 * copy, then translate by changing %<hex><hex>
265 * into represented value.
268 while (isgraph(*hbp))
270 h->uri = os_malloc((hbp - rawuri) + 1);
274 while (rawuri < hbp) {
277 isxdigit(rawuri[1]) && isxdigit(rawuri[2])) {
278 *uri++ = hex2byte(rawuri + 1);
285 *uri = 0; /* null terminate */
286 while (isgraph(*hbp))
288 while (*hbp == ' ' || *hbp == '\t')
291 if (0 == strncmp(hbp, "HTTP/", 5)) {
293 if (hbp[0] == '1' && hbp[1] == '.' &&
294 isdigit(hbp[2]) && hbp[2] != '0')
298 /* skip rest of line */
303 /* Remainder of lines are options, in any order;
304 * or empty line to terminate
307 /* Empty line to terminate */
308 if (hbp[0] == '\n' ||
309 (hbp[0] == '\r' && hbp[1] == '\n'))
313 if (httpread_hdr_option_analyze(h, hbp))
321 /* chunked overrides content-length always */
323 h->got_content_length = 0;
325 /* For some types, we should not try to read a body
326 * This is in addition to the application determining
327 * that we should not read a body.
329 switch (h->hdr_type) {
330 case HTTPREAD_HDR_TYPE_REPLY:
331 /* Some codes can have a body and some not.
332 * For now, just assume that any other than 200
335 if (h->reply_code != 200)
338 case HTTPREAD_HDR_TYPE_GET:
339 case HTTPREAD_HDR_TYPE_HEAD:
340 /* in practice it appears that it is assumed
341 * that GETs have a body length of 0... ?
343 if (h->chunked == 0 && h->got_content_length == 0)
346 case HTTPREAD_HDR_TYPE_POST:
347 case HTTPREAD_HDR_TYPE_PUT:
348 case HTTPREAD_HDR_TYPE_DELETE:
349 case HTTPREAD_HDR_TYPE_TRACE:
350 case HTTPREAD_HDR_TYPE_CONNECT:
351 case HTTPREAD_HDR_TYPE_NOTIFY:
352 case HTTPREAD_HDR_TYPE_M_SEARCH:
353 case HTTPREAD_HDR_TYPE_M_POST:
354 case HTTPREAD_HDR_TYPE_SUBSCRIBE:
355 case HTTPREAD_HDR_TYPE_UNSUBSCRIBE:
368 /* httpread_read_handler -- called when socket ready to read
370 * Note: any extra data we read past end of transmitted file is ignored;
371 * if we were to support keeping connections open for multiple files then
372 * this would have to be addressed.
374 static void httpread_read_handler(int sd, void *eloop_ctx, void *sock_ctx)
376 struct httpread *h = sock_ctx;
378 char *rbp; /* pointer into read buffer */
379 char *hbp; /* pointer into header buffer */
380 char *bbp; /* pointer into body buffer */
381 char readbuf[HTTPREAD_READBUF_SIZE]; /* temp use to read into */
383 if (httpread_debug >= 20)
384 wpa_printf(MSG_DEBUG, "ENTER httpread_read_handler(%p)", h);
386 /* read some at a time, then search for the interal
387 * boundaries between header and data and etc.
389 nread = read(h->sd, readbuf, sizeof(readbuf));
393 /* end of transmission... this may be normal
394 * or may be an error... in some cases we can't
395 * tell which so we must assume it is normal then.
398 /* Must at least have completed header */
399 wpa_printf(MSG_DEBUG, "httpread premature eof(%p)", h);
402 if (h->chunked || h->got_content_length) {
403 /* Premature EOF; e.g. dropped connection */
404 wpa_printf(MSG_DEBUG,
405 "httpread premature eof(%p) %d/%d",
410 /* No explicit length, hopefully we have all the data
411 * although dropped connections can cause false
414 if (httpread_debug >= 10)
415 wpa_printf(MSG_DEBUG, "httpread ok eof(%p)", h);
421 /* Header consists of text lines (terminated by both CR and LF)
422 * and an empty line (CR LF only).
425 hbp = h->hdr + h->hdr_nbytes;
426 /* add to headers until:
427 * -- we run out of data in read buffer
428 * -- or, we run out of header buffer room
429 * -- or, we get double CRLF in headers
434 if (h->hdr_nbytes == HTTPREAD_HEADER_MAX_SIZE) {
440 if (h->hdr_nbytes >= 4 &&
446 *hbp = 0; /* null terminate */
450 /* here we've just finished reading the header */
451 if (httpread_hdr_analyze(h)) {
452 wpa_printf(MSG_DEBUG, "httpread bad hdr(%p)", h);
455 if (h->max_bytes == 0) {
456 if (httpread_debug >= 10)
457 wpa_printf(MSG_DEBUG,
458 "httpread no body hdr end(%p)", h);
461 if (h->got_content_length && h->content_length == 0) {
462 if (httpread_debug >= 10)
463 wpa_printf(MSG_DEBUG,
464 "httpread zero content length(%p)",
470 /* Certain types of requests never have data and so
471 * must be specially recognized.
473 if (!os_strncasecmp(h->hdr, "SUBSCRIBE", 9) ||
474 !os_strncasecmp(h->hdr, "UNSUBSCRIBE", 11) ||
475 !os_strncasecmp(h->hdr, "HEAD", 4) ||
476 !os_strncasecmp(h->hdr, "GET", 3)) {
478 if (httpread_debug >= 10)
479 wpa_printf(MSG_DEBUG,
480 "httpread NO BODY for sp. type");
486 /* Data can be just plain binary data, or if "chunked"
487 * consists of chunks each with a header, ending with
493 /* Here to get (more of) body */
494 /* ensure we have enough room for worst case for body
495 * plus a null termination character
497 if (h->body_alloc_nbytes < (h->body_nbytes + nread + 1)) {
499 int new_alloc_nbytes;
501 if (h->body_nbytes >= h->max_bytes)
503 new_alloc_nbytes = h->body_alloc_nbytes +
504 HTTPREAD_BODYBUF_DELTA;
505 /* For content-length case, the first time
506 * through we allocate the whole amount
509 if (h->got_content_length &&
510 new_alloc_nbytes < (h->content_length + 1))
511 new_alloc_nbytes = h->content_length + 1;
512 if ((new_body = os_realloc(h->body, new_alloc_nbytes))
517 h->body_alloc_nbytes = new_alloc_nbytes;
520 bbp = h->body + h->body_nbytes;
523 /* See if we need to stop */
524 if (h->chunked && h->in_chunk_data == 0) {
525 /* in chunk header */
526 char *cbp = h->body + h->chunk_start;
527 if (bbp-cbp >= 2 && bbp[-2] == '\r' &&
529 /* end of chunk hdr line */
530 /* hdr line consists solely
531 * of a hex numeral and CFLF
535 h->chunk_size = strtoul(cbp, NULL, 16);
536 /* throw away chunk header
537 * so we have only real data
539 h->body_nbytes = h->chunk_start;
541 if (h->chunk_size == 0) {
542 /* end of chunking */
543 /* trailer follows */
545 if (httpread_debug >= 20)
548 "httpread end chunks(%p)", h);
551 h->in_chunk_data = 1;
552 /* leave chunk_start alone */
554 } else if (h->chunked) {
556 if ((h->body_nbytes - h->chunk_start) ==
557 (h->chunk_size + 2)) {
558 /* end of chunk reached,
561 /* check chunk ended w/ CRLF
562 * which we'll throw away
564 if (bbp[-1] == '\n' &&
570 h->chunk_start = h->body_nbytes;
571 h->in_chunk_data = 0;
572 h->chunk_size = 0; /* just in case */
574 } else if (h->got_content_length &&
575 h->body_nbytes >= h->content_length) {
577 if (httpread_debug >= 10)
580 "httpread got content(%p)", h);
585 /* Now transfer. Optimize using memcpy where we can. */
586 if (h->chunked && h->in_chunk_data) {
587 /* copy up to remainder of chunk data
588 * plus the required CR+LF at end
590 ncopy = (h->chunk_start + h->chunk_size + 2) -
592 } else if (h->chunked) {
593 /*in chunk header -- don't optimize */
598 } else if (h->got_content_length) {
599 ncopy = h->content_length - h->body_nbytes;
603 /* Note: should never be 0 */
606 os_memcpy(bbp, rbp, ncopy);
608 h->body_nbytes += ncopy;
611 } /* body copy loop */
613 if (h->chunked && h->in_trailer) {
614 /* If "chunked" then there is always a trailer,
615 * consisting of zero or more non-empty lines
616 * ending with CR LF and then an empty line w/ CR LF.
617 * We do NOT support trailers except to skip them --
618 * this is supported (generally) by the http spec.
620 bbp = h->body + h->body_nbytes;
627 switch (h->trailer_state) {
628 case trailer_line_begin:
630 h->trailer_state = trailer_empty_cr;
632 h->trailer_state = trailer_nonempty;
634 case trailer_empty_cr:
637 h->trailer_state = trailer_line_begin;
639 if (httpread_debug >= 10)
642 "httpread got content(%p)", h);
646 h->trailer_state = trailer_nonempty;
648 case trailer_nonempty:
650 h->trailer_state = trailer_nonempty_cr;
652 case trailer_nonempty_cr:
654 h->trailer_state = trailer_line_begin;
656 h->trailer_state = trailer_nonempty;
665 wpa_printf(MSG_DEBUG, "httpread read/parse failure (%p)", h);
666 (*h->cb)(h, h->cookie, HTTPREAD_EVENT_ERROR);
673 if (httpread_debug >= 10)
674 wpa_printf(MSG_DEBUG,
675 "httpread got file %d bytes type %d",
676 h->body_nbytes, h->hdr_type);
677 /* Null terminate for convenience of some applications */
679 h->body[h->body_nbytes] = 0; /* null terminate */
681 /* Assume that we do NOT support keeping connection alive,
682 * and just in case somehow we don't get destroyed right away,
685 eloop_unregister_sock(h->sd, EVENT_TYPE_READ);
686 /* The application can destroy us whenever they feel like...
689 eloop_cancel_timeout(httpread_timeout_handler, NULL, h);
690 (*h->cb)(h, h->cookie, HTTPREAD_EVENT_FILE_READY);
694 /* httpread_create -- start a new reading session making use of eloop.
695 * The new instance will use the socket descriptor for reading (until
696 * it gets a file and not after) but will not close the socket, even
697 * when the instance is destroyed (the application must do that).
698 * Return NULL on error.
700 * Provided that httpread_create successfully returns a handle,
701 * the callback fnc is called to handle httpread_event events.
702 * The caller should do destroy on any errors or unknown events.
704 * Pass max_bytes == 0 to not read body at all (required for e.g.
705 * reply to HEAD request).
707 struct httpread * httpread_create(
708 int sd, /* descriptor of TCP socket to read from */
709 void (*cb)(struct httpread *handle, void *cookie,
710 enum httpread_event e), /* call on event */
711 void *cookie, /* pass to callback */
712 int max_bytes, /* maximum body size else abort it */
713 int timeout_seconds /* 0; or total duration timeout period */
716 struct httpread *h = NULL;
718 h = os_zalloc(sizeof(*h));
724 h->max_bytes = max_bytes;
725 h->timeout_seconds = timeout_seconds;
727 if (timeout_seconds > 0 &&
728 eloop_register_timeout(timeout_seconds, 0,
729 httpread_timeout_handler, NULL, h)) {
730 /* No way to recover (from malloc failure) */
733 if (eloop_register_sock(sd, EVENT_TYPE_READ, httpread_read_handler,
735 /* No way to recover (from malloc failure) */
748 /* httpread_hdr_type_get -- When file is ready, returns header type. */
749 enum httpread_hdr_type httpread_hdr_type_get(struct httpread *h)
755 /* httpread_uri_get -- When file is ready, uri_get returns (translated) URI
756 * or possibly NULL (which would be an error).
758 char * httpread_uri_get(struct httpread *h)
764 /* httpread_reply_code_get -- When reply is ready, returns reply code */
765 int httpread_reply_code_get(struct httpread *h)
767 return h->reply_code;
771 /* httpread_length_get -- When file is ready, returns file length. */
772 int httpread_length_get(struct httpread *h)
774 return h->body_nbytes;
778 /* httpread_data_get -- When file is ready, returns file content
779 * with null byte appened.
780 * Might return NULL in some error condition.
782 void * httpread_data_get(struct httpread *h)
784 return h->body ? h->body : "";
788 /* httpread_hdr_get -- When file is ready, returns header content
789 * with null byte appended.
790 * Might return NULL in some error condition.
792 char * httpread_hdr_get(struct httpread *h)
798 /* httpread_hdr_line_get -- When file is ready, returns pointer
799 * to line within header content matching the given tag
800 * (after the tag itself and any spaces/tabs).
802 * The tag should end with a colon for reliable matching.
804 * If not found, returns NULL;
806 char * httpread_hdr_line_get(struct httpread *h, const char *tag)
808 int tag_len = os_strlen(tag);
810 hdr = os_strchr(hdr, '\n');
815 if (!os_strncasecmp(hdr, tag, tag_len)) {
817 while (*hdr == ' ' || *hdr == '\t')
821 hdr = os_strchr(hdr, '\n');