Initial import from FreeBSD RELENG_4:
[dragonfly.git] / contrib / sendmail / src / mime.c
1 /*
2  * Copyright (c) 1998-2002 Sendmail, Inc. and its suppliers.
3  *      All rights reserved.
4  * Copyright (c) 1994, 1996-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1994
6  *      The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13
14 #include <sendmail.h>
15 #include <string.h>
16
17 SM_RCSID("@(#)$Id: mime.c,v 8.130 2002/05/21 03:39:34 ca Exp $")
18
19 /*
20 **  MIME support.
21 **
22 **      I am indebted to John Beck of Hewlett-Packard, who contributed
23 **      his code to me for inclusion.  As it turns out, I did not use
24 **      his code since he used a "minimum change" approach that used
25 **      several temp files, and I wanted a "minimum impact" approach
26 **      that would avoid copying.  However, looking over his code
27 **      helped me cement my understanding of the problem.
28 **
29 **      I also looked at, but did not directly use, Nathaniel
30 **      Borenstein's "code.c" module.  Again, it functioned as
31 **      a file-to-file translator, which did not fit within my
32 **      design bounds, but it was a useful base for understanding
33 **      the problem.
34 */
35
36 #if MIME8TO7
37 static int      isboundary __P((char *, char **));
38 static int      mimeboundary __P((char *, char **));
39 static int      mime_getchar __P((SM_FILE_T *, char **, int *));
40 static int      mime_getchar_crlf __P((SM_FILE_T *, char **, int *));
41
42 /* character set for hex and base64 encoding */
43 static char     Base16Code[] =  "0123456789ABCDEF";
44 static char     Base64Code[] =  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
45
46 /* types of MIME boundaries */
47 # define MBT_SYNTAX     0       /* syntax error */
48 # define MBT_NOTSEP     1       /* not a boundary */
49 # define MBT_INTERMED   2       /* intermediate boundary (no trailing --) */
50 # define MBT_FINAL      3       /* final boundary (trailing -- included) */
51
52 static char     *MimeBoundaryNames[] =
53 {
54         "SYNTAX",       "NOTSEP",       "INTERMED",     "FINAL"
55 };
56
57 static bool     MapNLtoCRLF;
58
59 /*
60 **  MIME8TO7 -- output 8 bit body in 7 bit format
61 **
62 **      The header has already been output -- this has to do the
63 **      8 to 7 bit conversion.  It would be easy if we didn't have
64 **      to deal with nested formats (multipart/xxx and message/rfc822).
65 **
66 **      We won't be called if we don't have to do a conversion, and
67 **      appropriate MIME-Version: and Content-Type: fields have been
68 **      output.  Any Content-Transfer-Encoding: field has not been
69 **      output, and we can add it here.
70 **
71 **      Parameters:
72 **              mci -- mailer connection information.
73 **              header -- the header for this body part.
74 **              e -- envelope.
75 **              boundaries -- the currently pending message boundaries.
76 **                      NULL if we are processing the outer portion.
77 **              flags -- to tweak processing.
78 **
79 **      Returns:
80 **              An indicator of what terminated the message part:
81 **                MBT_FINAL -- the final boundary
82 **                MBT_INTERMED -- an intermediate boundary
83 **                MBT_NOTSEP -- an end of file
84 */
85
86 struct args
87 {
88         char    *a_field;       /* name of field */
89         char    *a_value;       /* value of that field */
90 };
91
92 int
93 mime8to7(mci, header, e, boundaries, flags)
94         register MCI *mci;
95         HDR *header;
96         register ENVELOPE *e;
97         char **boundaries;
98         int flags;
99 {
100         register char *p;
101         int linelen;
102         int bt;
103         off_t offset;
104         size_t sectionsize, sectionhighbits;
105         int i;
106         char *type;
107         char *subtype;
108         char *cte;
109         char **pvp;
110         int argc = 0;
111         char *bp;
112         bool use_qp = false;
113         struct args argv[MAXMIMEARGS];
114         char bbuf[128];
115         char buf[MAXLINE];
116         char pvpbuf[MAXLINE];
117         extern unsigned char MimeTokenTab[256];
118
119         if (tTd(43, 1))
120         {
121                 sm_dprintf("mime8to7: flags = %x, boundaries =", flags);
122                 if (boundaries[0] == NULL)
123                         sm_dprintf(" <none>");
124                 else
125                 {
126                         for (i = 0; boundaries[i] != NULL; i++)
127                                 sm_dprintf(" %s", boundaries[i]);
128                 }
129                 sm_dprintf("\n");
130         }
131         MapNLtoCRLF = true;
132         p = hvalue("Content-Transfer-Encoding", header);
133         if (p == NULL ||
134             (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
135                            MimeTokenTab)) == NULL ||
136             pvp[0] == NULL)
137         {
138                 cte = NULL;
139         }
140         else
141         {
142                 cataddr(pvp, NULL, buf, sizeof buf, '\0');
143                 cte = sm_rpool_strdup_x(e->e_rpool, buf);
144         }
145
146         type = subtype = NULL;
147         p = hvalue("Content-Type", header);
148         if (p == NULL)
149         {
150                 if (bitset(M87F_DIGEST, flags))
151                         p = "message/rfc822";
152                 else
153                         p = "text/plain";
154         }
155         if (p != NULL &&
156             (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
157                            MimeTokenTab)) != NULL &&
158             pvp[0] != NULL)
159         {
160                 if (tTd(43, 40))
161                 {
162                         for (i = 0; pvp[i] != NULL; i++)
163                                 sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]);
164                 }
165                 type = *pvp++;
166                 if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
167                     *++pvp != NULL)
168                 {
169                         subtype = *pvp++;
170                 }
171
172                 /* break out parameters */
173                 while (*pvp != NULL && argc < MAXMIMEARGS)
174                 {
175                         /* skip to semicolon separator */
176                         while (*pvp != NULL && strcmp(*pvp, ";") != 0)
177                                 pvp++;
178                         if (*pvp++ == NULL || *pvp == NULL)
179                                 break;
180
181                         /* complain about empty values */
182                         if (strcmp(*pvp, ";") == 0)
183                         {
184                                 usrerr("mime8to7: Empty parameter in Content-Type header");
185
186                                 /* avoid bounce loops */
187                                 e->e_flags |= EF_DONT_MIME;
188                                 continue;
189                         }
190
191                         /* extract field name */
192                         argv[argc].a_field = *pvp++;
193
194                         /* see if there is a value */
195                         if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
196                             (*++pvp == NULL || strcmp(*pvp, ";") != 0))
197                         {
198                                 argv[argc].a_value = *pvp;
199                                 argc++;
200                         }
201                 }
202         }
203
204         /* check for disaster cases */
205         if (type == NULL)
206                 type = "-none-";
207         if (subtype == NULL)
208                 subtype = "-none-";
209
210         /* don't propogate some flags more than one level into the message */
211         flags &= ~M87F_DIGEST;
212
213         /*
214         **  Check for cases that can not be encoded.
215         **
216         **      For example, you can't encode certain kinds of types
217         **      or already-encoded messages.  If we find this case,
218         **      just copy it through.
219         */
220
221         (void) sm_snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype);
222         if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
223                 flags |= M87F_NO8BIT;
224
225 # ifdef USE_B_CLASS
226         if (wordinclass(buf, 'b') || wordinclass(type, 'b'))
227                 MapNLtoCRLF = false;
228 # endif /* USE_B_CLASS */
229         if (wordinclass(buf, 'q') || wordinclass(type, 'q'))
230                 use_qp = true;
231
232         /*
233         **  Multipart requires special processing.
234         **
235         **      Do a recursive descent into the message.
236         */
237
238         if (sm_strcasecmp(type, "multipart") == 0 &&
239             (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags)))
240         {
241
242                 if (sm_strcasecmp(subtype, "digest") == 0)
243                         flags |= M87F_DIGEST;
244
245                 for (i = 0; i < argc; i++)
246                 {
247                         if (sm_strcasecmp(argv[i].a_field, "boundary") == 0)
248                                 break;
249                 }
250                 if (i >= argc || argv[i].a_value == NULL)
251                 {
252                         usrerr("mime8to7: Content-Type: \"%s\": %s boundary",
253                                 i >= argc ? "missing" : "bogus", p);
254                         p = "---";
255
256                         /* avoid bounce loops */
257                         e->e_flags |= EF_DONT_MIME;
258                 }
259                 else
260                 {
261                         p = argv[i].a_value;
262                         stripquotes(p);
263                 }
264                 if (sm_strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf)
265                 {
266                         usrerr("mime8to7: multipart boundary \"%s\" too long",
267                                 p);
268
269                         /* avoid bounce loops */
270                         e->e_flags |= EF_DONT_MIME;
271                 }
272
273                 if (tTd(43, 1))
274                         sm_dprintf("mime8to7: multipart boundary \"%s\"\n",
275                                 bbuf);
276                 for (i = 0; i < MAXMIMENESTING; i++)
277                 {
278                         if (boundaries[i] == NULL)
279                                 break;
280                 }
281                 if (i >= MAXMIMENESTING)
282                 {
283                         usrerr("mime8to7: multipart nesting boundary too deep");
284
285                         /* avoid bounce loops */
286                         e->e_flags |= EF_DONT_MIME;
287                 }
288                 else
289                 {
290                         boundaries[i] = bbuf;
291                         boundaries[i + 1] = NULL;
292                 }
293                 mci->mci_flags |= MCIF_INMIME;
294
295                 /* skip the early "comment" prologue */
296                 putline("", mci);
297                 mci->mci_flags &= ~MCIF_INHEADER;
298                 bt = MBT_FINAL;
299                 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
300                         != NULL)
301                 {
302                         bt = mimeboundary(buf, boundaries);
303                         if (bt != MBT_NOTSEP)
304                                 break;
305                         putxline(buf, strlen(buf), mci,
306                                  PXLF_MAPFROM|PXLF_STRIP8BIT);
307                         if (tTd(43, 99))
308                                 sm_dprintf("  ...%s", buf);
309                 }
310                 if (sm_io_eof(e->e_dfp))
311                         bt = MBT_FINAL;
312                 while (bt != MBT_FINAL)
313                 {
314                         auto HDR *hdr = NULL;
315
316                         (void) sm_strlcpyn(buf, sizeof buf, 2, "--", bbuf);
317                         putline(buf, mci);
318                         if (tTd(43, 35))
319                                 sm_dprintf("  ...%s\n", buf);
320                         collect(e->e_dfp, false, &hdr, e);
321                         if (tTd(43, 101))
322                                 putline("+++after collect", mci);
323                         putheader(mci, hdr, e, flags);
324                         if (tTd(43, 101))
325                                 putline("+++after putheader", mci);
326                         bt = mime8to7(mci, hdr, e, boundaries, flags);
327                 }
328                 (void) sm_strlcpyn(buf, sizeof buf, 3, "--", bbuf, "--");
329                 putline(buf, mci);
330                 if (tTd(43, 35))
331                         sm_dprintf("  ...%s\n", buf);
332                 boundaries[i] = NULL;
333                 mci->mci_flags &= ~MCIF_INMIME;
334
335                 /* skip the late "comment" epilogue */
336                 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
337                         != NULL)
338                 {
339                         bt = mimeboundary(buf, boundaries);
340                         if (bt != MBT_NOTSEP)
341                                 break;
342                         putxline(buf, strlen(buf), mci,
343                                  PXLF_MAPFROM|PXLF_STRIP8BIT);
344                         if (tTd(43, 99))
345                                 sm_dprintf("  ...%s", buf);
346                 }
347                 if (sm_io_eof(e->e_dfp))
348                         bt = MBT_FINAL;
349                 if (tTd(43, 3))
350                         sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n",
351                                 MimeBoundaryNames[bt]);
352                 return bt;
353         }
354
355         /*
356         **  Message/xxx types -- recurse exactly once.
357         **
358         **      Class 's' is predefined to have "rfc822" only.
359         */
360
361         if (sm_strcasecmp(type, "message") == 0)
362         {
363                 if (!wordinclass(subtype, 's'))
364                 {
365                         flags |= M87F_NO8BIT;
366                 }
367                 else
368                 {
369                         auto HDR *hdr = NULL;
370
371                         putline("", mci);
372
373                         mci->mci_flags |= MCIF_INMIME;
374                         collect(e->e_dfp, false, &hdr, e);
375                         if (tTd(43, 101))
376                                 putline("+++after collect", mci);
377                         putheader(mci, hdr, e, flags);
378                         if (tTd(43, 101))
379                                 putline("+++after putheader", mci);
380                         if (hvalue("MIME-Version", hdr) == NULL &&
381                             !bitset(M87F_NO8TO7, flags))
382                                 putline("MIME-Version: 1.0", mci);
383                         bt = mime8to7(mci, hdr, e, boundaries, flags);
384                         mci->mci_flags &= ~MCIF_INMIME;
385                         return bt;
386                 }
387         }
388
389         /*
390         **  Non-compound body type
391         **
392         **      Compute the ratio of seven to eight bit characters;
393         **      use that as a heuristic to decide how to do the
394         **      encoding.
395         */
396
397         sectionsize = sectionhighbits = 0;
398         if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags))
399         {
400                 /* remember where we were */
401                 offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT);
402                 if (offset == -1)
403                         syserr("mime8to7: cannot sm_io_tell on %cf%s",
404                                DATAFL_LETTER, e->e_id);
405
406                 /* do a scan of this body type to count character types */
407                 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
408                         != NULL)
409                 {
410                         if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
411                                 break;
412                         for (p = buf; *p != '\0'; p++)
413                         {
414                                 /* count bytes with the high bit set */
415                                 sectionsize++;
416                                 if (bitset(0200, *p))
417                                         sectionhighbits++;
418                         }
419
420                         /*
421                         **  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
422                         **  assume base64.  This heuristic avoids double-reading
423                         **  large graphics or video files.
424                         */
425
426                         if (sectionsize >= 4096 &&
427                             sectionhighbits > sectionsize / 4)
428                                 break;
429                 }
430
431                 /* return to the original offset for processing */
432                 /* XXX use relative seeks to handle >31 bit file sizes? */
433                 if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0)
434                         syserr("mime8to7: cannot sm_io_fseek on %cf%s",
435                                DATAFL_LETTER, e->e_id);
436                 else
437                         sm_io_clearerr(e->e_dfp);
438         }
439
440         /*
441         **  Heuristically determine encoding method.
442         **      If more than 1/8 of the total characters have the
443         **      eighth bit set, use base64; else use quoted-printable.
444         **      However, only encode binary encoded data as base64,
445         **      since otherwise the NL=>CRLF mapping will be a problem.
446         */
447
448         if (tTd(43, 8))
449         {
450                 sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
451                         (long) sectionhighbits, (long) sectionsize,
452                         cte == NULL ? "[none]" : cte,
453                         type == NULL ? "[none]" : type,
454                         subtype == NULL ? "[none]" : subtype);
455         }
456         if (cte != NULL && sm_strcasecmp(cte, "binary") == 0)
457                 sectionsize = sectionhighbits;
458         linelen = 0;
459         bp = buf;
460         if (sectionhighbits == 0)
461         {
462                 /* no encoding necessary */
463                 if (cte != NULL &&
464                     bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME,
465                            mci->mci_flags) &&
466                     !bitset(M87F_NO8TO7, flags))
467                 {
468                         /*
469                         **  Skip _unless_ in MIME mode and potentially
470                         **  converting from 8 bit to 7 bit MIME.  See
471                         **  putheader() for the counterpart where the
472                         **  CTE header is skipped in the opposite
473                         **  situation.
474                         */
475
476                         (void) sm_snprintf(buf, sizeof buf,
477                                 "Content-Transfer-Encoding: %.200s", cte);
478                         putline(buf, mci);
479                         if (tTd(43, 36))
480                                 sm_dprintf("  ...%s\n", buf);
481                 }
482                 putline("", mci);
483                 mci->mci_flags &= ~MCIF_INHEADER;
484                 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
485                         != NULL)
486                 {
487                         bt = mimeboundary(buf, boundaries);
488                         if (bt != MBT_NOTSEP)
489                                 break;
490                         putline(buf, mci);
491                 }
492                 if (sm_io_eof(e->e_dfp))
493                         bt = MBT_FINAL;
494         }
495         else if (!MapNLtoCRLF ||
496                  (sectionsize / 8 < sectionhighbits && !use_qp))
497         {
498                 /* use base64 encoding */
499                 int c1, c2;
500
501                 if (tTd(43, 36))
502                         sm_dprintf("  ...Content-Transfer-Encoding: base64\n");
503                 putline("Content-Transfer-Encoding: base64", mci);
504                 (void) sm_snprintf(buf, sizeof buf,
505                         "X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
506                         MyHostName, e->e_id);
507                 putline(buf, mci);
508                 putline("", mci);
509                 mci->mci_flags &= ~MCIF_INHEADER;
510                 while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) !=
511                         SM_IO_EOF)
512                 {
513                         if (linelen > 71)
514                         {
515                                 *bp = '\0';
516                                 putline(buf, mci);
517                                 linelen = 0;
518                                 bp = buf;
519                         }
520                         linelen += 4;
521                         *bp++ = Base64Code[(c1 >> 2)];
522                         c1 = (c1 & 0x03) << 4;
523                         c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
524                         if (c2 == SM_IO_EOF)
525                         {
526                                 *bp++ = Base64Code[c1];
527                                 *bp++ = '=';
528                                 *bp++ = '=';
529                                 break;
530                         }
531                         c1 |= (c2 >> 4) & 0x0f;
532                         *bp++ = Base64Code[c1];
533                         c1 = (c2 & 0x0f) << 2;
534                         c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
535                         if (c2 == SM_IO_EOF)
536                         {
537                                 *bp++ = Base64Code[c1];
538                                 *bp++ = '=';
539                                 break;
540                         }
541                         c1 |= (c2 >> 6) & 0x03;
542                         *bp++ = Base64Code[c1];
543                         *bp++ = Base64Code[c2 & 0x3f];
544                 }
545                 *bp = '\0';
546                 putline(buf, mci);
547         }
548         else
549         {
550                 /* use quoted-printable encoding */
551                 int c1, c2;
552                 int fromstate;
553                 BITMAP256 badchars;
554
555                 /* set up map of characters that must be mapped */
556                 clrbitmap(badchars);
557                 for (c1 = 0x00; c1 < 0x20; c1++)
558                         setbitn(c1, badchars);
559                 clrbitn('\t', badchars);
560                 for (c1 = 0x7f; c1 < 0x100; c1++)
561                         setbitn(c1, badchars);
562                 setbitn('=', badchars);
563                 if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
564                         for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
565                                 setbitn(*p, badchars);
566
567                 if (tTd(43, 36))
568                         sm_dprintf("  ...Content-Transfer-Encoding: quoted-printable\n");
569                 putline("Content-Transfer-Encoding: quoted-printable", mci);
570                 (void) sm_snprintf(buf, sizeof buf,
571                         "X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
572                         MyHostName, e->e_id);
573                 putline(buf, mci);
574                 putline("", mci);
575                 mci->mci_flags &= ~MCIF_INHEADER;
576                 fromstate = 0;
577                 c2 = '\n';
578                 while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) !=
579                         SM_IO_EOF)
580                 {
581                         if (c1 == '\n')
582                         {
583                                 if (c2 == ' ' || c2 == '\t')
584                                 {
585                                         *bp++ = '=';
586                                         *bp++ = Base16Code[(c2 >> 4) & 0x0f];
587                                         *bp++ = Base16Code[c2 & 0x0f];
588                                 }
589                                 if (buf[0] == '.' && bp == &buf[1])
590                                 {
591                                         buf[0] = '=';
592                                         *bp++ = Base16Code[('.' >> 4) & 0x0f];
593                                         *bp++ = Base16Code['.' & 0x0f];
594                                 }
595                                 *bp = '\0';
596                                 putline(buf, mci);
597                                 linelen = fromstate = 0;
598                                 bp = buf;
599                                 c2 = c1;
600                                 continue;
601                         }
602                         if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
603                             bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
604                         {
605                                 *bp++ = '=';
606                                 *bp++ = '2';
607                                 *bp++ = '0';
608                                 linelen += 3;
609                         }
610                         else if (c2 == ' ' || c2 == '\t')
611                         {
612                                 *bp++ = c2;
613                                 linelen++;
614                         }
615                         if (linelen > 72 &&
616                             (linelen > 75 || c1 != '.' ||
617                              (linelen > 73 && c2 == '.')))
618                         {
619                                 if (linelen > 73 && c2 == '.')
620                                         bp--;
621                                 else
622                                         c2 = '\n';
623                                 *bp++ = '=';
624                                 *bp = '\0';
625                                 putline(buf, mci);
626                                 linelen = fromstate = 0;
627                                 bp = buf;
628                                 if (c2 == '.')
629                                 {
630                                         *bp++ = '.';
631                                         linelen++;
632                                 }
633                         }
634                         if (bitnset(bitidx(c1), badchars))
635                         {
636                                 *bp++ = '=';
637                                 *bp++ = Base16Code[(c1 >> 4) & 0x0f];
638                                 *bp++ = Base16Code[c1 & 0x0f];
639                                 linelen += 3;
640                         }
641                         else if (c1 != ' ' && c1 != '\t')
642                         {
643                                 if (linelen < 4 && c1 == "From"[linelen])
644                                         fromstate++;
645                                 *bp++ = c1;
646                                 linelen++;
647                         }
648                         c2 = c1;
649                 }
650
651                 /* output any saved character */
652                 if (c2 == ' ' || c2 == '\t')
653                 {
654                         *bp++ = '=';
655                         *bp++ = Base16Code[(c2 >> 4) & 0x0f];
656                         *bp++ = Base16Code[c2 & 0x0f];
657                         linelen += 3;
658                 }
659
660                 if (linelen > 0 || boundaries[0] != NULL)
661                 {
662                         *bp = '\0';
663                         putline(buf, mci);
664                 }
665
666         }
667         if (tTd(43, 3))
668                 sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
669         return bt;
670 }
671 /*
672 **  MIME_GETCHAR -- get a character for MIME processing
673 **
674 **      Treats boundaries as SM_IO_EOF.
675 **
676 **      Parameters:
677 **              fp -- the input file.
678 **              boundaries -- the current MIME boundaries.
679 **              btp -- if the return value is SM_IO_EOF, *btp is set to
680 **                      the type of the boundary.
681 **
682 **      Returns:
683 **              The next character in the input stream.
684 */
685
686 static int
687 mime_getchar(fp, boundaries, btp)
688         register SM_FILE_T *fp;
689         char **boundaries;
690         int *btp;
691 {
692         int c;
693         static unsigned char *bp = NULL;
694         static int buflen = 0;
695         static bool atbol = true;       /* at beginning of line */
696         static int bt = MBT_SYNTAX;     /* boundary type of next SM_IO_EOF */
697         static unsigned char buf[128];  /* need not be a full line */
698         int start = 0;                  /* indicates position of - in buffer */
699
700         if (buflen == 1 && *bp == '\n')
701         {
702                 /* last \n in buffer may be part of next MIME boundary */
703                 c = *bp;
704         }
705         else if (buflen > 0)
706         {
707                 buflen--;
708                 return *bp++;
709         }
710         else
711                 c = sm_io_getc(fp, SM_TIME_DEFAULT);
712         bp = buf;
713         buflen = 0;
714         if (c == '\n')
715         {
716                 /* might be part of a MIME boundary */
717                 *bp++ = c;
718                 atbol = true;
719                 c = sm_io_getc(fp, SM_TIME_DEFAULT);
720                 if (c == '\n')
721                 {
722                         (void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c);
723                         return c;
724                 }
725                 start = 1;
726         }
727         if (c != SM_IO_EOF)
728                 *bp++ = c;
729         else
730                 bt = MBT_FINAL;
731         if (atbol && c == '-')
732         {
733                 /* check for a message boundary */
734                 c = sm_io_getc(fp, SM_TIME_DEFAULT);
735                 if (c != '-')
736                 {
737                         if (c != SM_IO_EOF)
738                                 *bp++ = c;
739                         else
740                                 bt = MBT_FINAL;
741                         buflen = bp - buf - 1;
742                         bp = buf;
743                         return *bp++;
744                 }
745
746                 /* got "--", now check for rest of separator */
747                 *bp++ = '-';
748                 while (bp < &buf[sizeof buf - 2] &&
749                        (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF &&
750                        c != '\n')
751                 {
752                         *bp++ = c;
753                 }
754                 *bp = '\0';     /* XXX simply cut off? */
755                 bt = mimeboundary((char *) &buf[start], boundaries);
756                 switch (bt)
757                 {
758                   case MBT_FINAL:
759                   case MBT_INTERMED:
760                         /* we have a message boundary */
761                         buflen = 0;
762                         *btp = bt;
763                         return SM_IO_EOF;
764                 }
765
766                 atbol = c == '\n';
767                 if (c != SM_IO_EOF)
768                         *bp++ = c;
769         }
770
771         buflen = bp - buf - 1;
772         if (buflen < 0)
773         {
774                 *btp = bt;
775                 return SM_IO_EOF;
776         }
777         bp = buf;
778         return *bp++;
779 }
780 /*
781 **  MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
782 **
783 **      Parameters:
784 **              fp -- the input file.
785 **              boundaries -- the current MIME boundaries.
786 **              btp -- if the return value is SM_IO_EOF, *btp is set to
787 **                      the type of the boundary.
788 **
789 **      Returns:
790 **              The next character in the input stream.
791 */
792
793 static int
794 mime_getchar_crlf(fp, boundaries, btp)
795         register SM_FILE_T *fp;
796         char **boundaries;
797         int *btp;
798 {
799         static bool sendlf = false;
800         int c;
801
802         if (sendlf)
803         {
804                 sendlf = false;
805                 return '\n';
806         }
807         c = mime_getchar(fp, boundaries, btp);
808         if (c == '\n' && MapNLtoCRLF)
809         {
810                 sendlf = true;
811                 return '\r';
812         }
813         return c;
814 }
815 /*
816 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
817 **
818 **      Parameters:
819 **              line -- the input line.
820 **              boundaries -- the set of currently pending boundaries.
821 **
822 **      Returns:
823 **              MBT_NOTSEP -- if this is not a separator line
824 **              MBT_INTERMED -- if this is an intermediate separator
825 **              MBT_FINAL -- if this is a final boundary
826 **              MBT_SYNTAX -- if this is a boundary for the wrong
827 **                      enclosure -- i.e., a syntax error.
828 */
829
830 static int
831 mimeboundary(line, boundaries)
832         register char *line;
833         char **boundaries;
834 {
835         int type = MBT_NOTSEP;
836         int i;
837         int savec;
838
839         if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
840                 return MBT_NOTSEP;
841         i = strlen(line);
842         if (i > 0 && line[i - 1] == '\n')
843                 i--;
844
845         /* strip off trailing whitespace */
846         while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t'))
847                 i--;
848         savec = line[i];
849         line[i] = '\0';
850
851         if (tTd(43, 5))
852                 sm_dprintf("mimeboundary: line=\"%s\"... ", line);
853
854         /* check for this as an intermediate boundary */
855         if (isboundary(&line[2], boundaries) >= 0)
856                 type = MBT_INTERMED;
857         else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
858         {
859                 /* check for a final boundary */
860                 line[i - 2] = '\0';
861                 if (isboundary(&line[2], boundaries) >= 0)
862                         type = MBT_FINAL;
863                 line[i - 2] = '-';
864         }
865
866         line[i] = savec;
867         if (tTd(43, 5))
868                 sm_dprintf("%s\n", MimeBoundaryNames[type]);
869         return type;
870 }
871 /*
872 **  DEFCHARSET -- return default character set for message
873 **
874 **      The first choice for character set is for the mailer
875 **      corresponding to the envelope sender.  If neither that
876 **      nor the global configuration file has a default character
877 **      set defined, return "unknown-8bit" as recommended by
878 **      RFC 1428 section 3.
879 **
880 **      Parameters:
881 **              e -- the envelope for this message.
882 **
883 **      Returns:
884 **              The default character set for that mailer.
885 */
886
887 char *
888 defcharset(e)
889         register ENVELOPE *e;
890 {
891         if (e != NULL && e->e_from.q_mailer != NULL &&
892             e->e_from.q_mailer->m_defcharset != NULL)
893                 return e->e_from.q_mailer->m_defcharset;
894         if (DefaultCharSet != NULL)
895                 return DefaultCharSet;
896         return "unknown-8bit";
897 }
898 /*
899 **  ISBOUNDARY -- is a given string a currently valid boundary?
900 **
901 **      Parameters:
902 **              line -- the current input line.
903 **              boundaries -- the list of valid boundaries.
904 **
905 **      Returns:
906 **              The index number in boundaries if the line is found.
907 **              -1 -- otherwise.
908 **
909 */
910
911 static int
912 isboundary(line, boundaries)
913         char *line;
914         char **boundaries;
915 {
916         register int i;
917
918         for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++)
919         {
920                 if (strcmp(line, boundaries[i]) == 0)
921                         return i;
922         }
923         return -1;
924 }
925 #endif /* MIME8TO7 */
926
927 #if MIME7TO8
928 static int      mime_fromqp __P((unsigned char *, unsigned char **, int));
929
930 /*
931 **  MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format
932 **
933 **  This is a hack. Supports translating the two 7-bit body-encodings
934 **  (quoted-printable and base64) to 8-bit coded bodies.
935 **
936 **  There is not much point in supporting multipart here, as the UA
937 **  will be able to deal with encoded MIME bodies if it can parse MIME
938 **  multipart messages.
939 **
940 **  Note also that we won't be called unless it is a text/plain MIME
941 **  message, encoded base64 or QP and mailer flag '9' has been defined
942 **  on mailer.
943 **
944 **  Contributed by Marius Olaffson <marius@rhi.hi.is>.
945 **
946 **      Parameters:
947 **              mci -- mailer connection information.
948 **              header -- the header for this body part.
949 **              e -- envelope.
950 **
951 **      Returns:
952 **              none.
953 */
954
955 static char index_64[128] =
956 {
957         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
958         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
959         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
960         52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
961         -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
962         15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
963         -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
964         41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
965 };
966
967 # define CHAR64(c)  (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])
968
969 void
970 mime7to8(mci, header, e)
971         register MCI *mci;
972         HDR *header;
973         register ENVELOPE *e;
974 {
975         int pxflags;
976         register char *p;
977         char *cte;
978         char **pvp;
979         unsigned char *fbufp;
980         char buf[MAXLINE];
981         unsigned char fbuf[MAXLINE + 1];
982         char pvpbuf[MAXLINE];
983         extern unsigned char MimeTokenTab[256];
984
985         p = hvalue("Content-Transfer-Encoding", header);
986         if (p == NULL ||
987             (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
988                            MimeTokenTab)) == NULL ||
989             pvp[0] == NULL)
990         {
991                 /* "can't happen" -- upper level should have caught this */
992                 syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p);
993
994                 /* avoid bounce loops */
995                 e->e_flags |= EF_DONT_MIME;
996
997                 /* cheap failsafe algorithm -- should work on text/plain */
998                 if (p != NULL)
999                 {
1000                         (void) sm_snprintf(buf, sizeof buf,
1001                                 "Content-Transfer-Encoding: %s", p);
1002                         putline(buf, mci);
1003                 }
1004                 putline("", mci);
1005                 mci->mci_flags &= ~MCIF_INHEADER;
1006                 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
1007                         != NULL)
1008                         putline(buf, mci);
1009                 return;
1010         }
1011         cataddr(pvp, NULL, buf, sizeof buf, '\0');
1012         cte = sm_rpool_strdup_x(e->e_rpool, buf);
1013
1014         mci->mci_flags |= MCIF_INHEADER;
1015         putline("Content-Transfer-Encoding: 8bit", mci);
1016         (void) sm_snprintf(buf, sizeof buf,
1017                 "X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1018                 cte, MyHostName, e->e_id);
1019         putline(buf, mci);
1020         putline("", mci);
1021         mci->mci_flags &= ~MCIF_INHEADER;
1022
1023         /*
1024         **  Translate body encoding to 8-bit.  Supports two types of
1025         **  encodings; "base64" and "quoted-printable". Assume qp if
1026         **  it is not base64.
1027         */
1028
1029         pxflags = PXLF_MAPFROM;
1030         if (sm_strcasecmp(cte, "base64") == 0)
1031         {
1032                 int c1, c2, c3, c4;
1033
1034                 fbufp = fbuf;
1035                 while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) !=
1036                         SM_IO_EOF)
1037                 {
1038                         if (isascii(c1) && isspace(c1))
1039                                 continue;
1040
1041                         do
1042                         {
1043                                 c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1044                         } while (isascii(c2) && isspace(c2));
1045                         if (c2 == SM_IO_EOF)
1046                                 break;
1047
1048                         do
1049                         {
1050                                 c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1051                         } while (isascii(c3) && isspace(c3));
1052                         if (c3 == SM_IO_EOF)
1053                                 break;
1054
1055                         do
1056                         {
1057                                 c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1058                         } while (isascii(c4) && isspace(c4));
1059                         if (c4 == SM_IO_EOF)
1060                                 break;
1061
1062                         if (c1 == '=' || c2 == '=')
1063                                 continue;
1064                         c1 = CHAR64(c1);
1065                         c2 = CHAR64(c2);
1066
1067                         *fbufp = (c1 << 2) | ((c2 & 0x30) >> 4);
1068                         if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1069                         {
1070                                 if (*--fbufp != '\n' ||
1071                                     (fbufp > fbuf && *--fbufp != '\r'))
1072                                 {
1073                                         pxflags |= PXLF_NOADDEOL;
1074                                         fbufp++;
1075                                 }
1076                                 putxline((char *) fbuf, fbufp - fbuf,
1077                                          mci, pxflags);
1078                                 pxflags &= ~PXLF_NOADDEOL;
1079                                 fbufp = fbuf;
1080                         }
1081                         if (c3 == '=')
1082                                 continue;
1083                         c3 = CHAR64(c3);
1084                         *fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
1085                         if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1086                         {
1087                                 if (*--fbufp != '\n' ||
1088                                     (fbufp > fbuf && *--fbufp != '\r'))
1089                                 {
1090                                         pxflags |= PXLF_NOADDEOL;
1091                                         fbufp++;
1092                                 }
1093                                 putxline((char *) fbuf, fbufp - fbuf,
1094                                          mci, pxflags);
1095                                 pxflags &= ~PXLF_NOADDEOL;
1096                                 fbufp = fbuf;
1097                         }
1098                         if (c4 == '=')
1099                                 continue;
1100                         c4 = CHAR64(c4);
1101                         *fbufp = ((c3 & 0x03) << 6) | c4;
1102                         if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1103                         {
1104                                 if (*--fbufp != '\n' ||
1105                                     (fbufp > fbuf && *--fbufp != '\r'))
1106                                 {
1107                                         pxflags |= PXLF_NOADDEOL;
1108                                         fbufp++;
1109                                 }
1110                                 putxline((char *) fbuf, fbufp - fbuf,
1111                                          mci, pxflags);
1112                                 pxflags &= ~PXLF_NOADDEOL;
1113                                 fbufp = fbuf;
1114                         }
1115                 }
1116         }
1117         else
1118         {
1119                 int off;
1120
1121                 /* quoted-printable */
1122                 pxflags |= PXLF_NOADDEOL;
1123                 fbufp = fbuf;
1124                 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1125                                    sizeof buf) != NULL)
1126                 {
1127                         off = mime_fromqp((unsigned char *) buf, &fbufp,
1128                                           &fbuf[MAXLINE] - fbufp);
1129 again:
1130                         if (off < -1)
1131                                 continue;
1132
1133                         if (fbufp - fbuf > 0)
1134                                 putxline((char *) fbuf, fbufp - fbuf - 1, mci,
1135                                          pxflags);
1136                         fbufp = fbuf;
1137                         if (off >= 0 && buf[off] != '\0')
1138                         {
1139                                 off = mime_fromqp((unsigned char *) (buf + off),
1140                                                   &fbufp,
1141                                                   &fbuf[MAXLINE] - fbufp);
1142                                 goto again;
1143                         }
1144                 }
1145         }
1146
1147         /* force out partial last line */
1148         if (fbufp > fbuf)
1149         {
1150                 *fbufp = '\0';
1151                 putxline((char *) fbuf, fbufp - fbuf, mci, pxflags);
1152         }
1153
1154         /*
1155         **  The decoded text may end without an EOL.  Since this function
1156         **  is only called for text/plain MIME messages, it is safe to
1157         **  add an extra one at the end just in case.  This is a hack,
1158         **  but so is auto-converting MIME in the first place.
1159         */
1160
1161         putline("", mci);
1162
1163         if (tTd(43, 3))
1164                 sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte);
1165 }
1166 /*
1167 **  The following is based on Borenstein's "codes.c" module, with simplifying
1168 **  changes as we do not deal with multipart, and to do the translation in-core,
1169 **  with an attempt to prevent overrun of output buffers.
1170 **
1171 **  What is needed here are changes to defend this code better against
1172 **  bad encodings. Questionable to always return 0xFF for bad mappings.
1173 */
1174
1175 static char index_hex[128] =
1176 {
1177         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1178         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1179         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1180         0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1181         -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1182         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1183         -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1184         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1185 };
1186
1187 # define HEXCHAR(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])
1188
1189 /*
1190 **  MIME_FROMQP -- decode quoted printable string
1191 **
1192 **      Parameters:
1193 **              infile -- input (encoded) string
1194 **              outfile -- output string
1195 **              maxlen -- size of output buffer
1196 **
1197 **      Returns:
1198 **              -2 if decoding failure
1199 **              -1 if infile completely decoded into outfile
1200 **              >= 0 is the position in infile decoding
1201 **                      reached before maxlen was reached
1202 */
1203
1204 static int
1205 mime_fromqp(infile, outfile, maxlen)
1206         unsigned char *infile;
1207         unsigned char **outfile;
1208         int maxlen;             /* Max # of chars allowed in outfile */
1209 {
1210         int c1, c2;
1211         int nchar = 0;
1212         unsigned char *b;
1213
1214         /* decrement by one for trailing '\0', at least one other char */
1215         if (--maxlen < 1)
1216                 return 0;
1217
1218         b = infile;
1219         while ((c1 = *infile++) != '\0' && nchar < maxlen)
1220         {
1221                 if (c1 == '=')
1222                 {
1223                         if ((c1 = *infile++) == '\0')
1224                                 break;
1225
1226                         if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1)
1227                         {
1228                                 /* ignore it and the rest of the buffer */
1229                                 return -2;
1230                         }
1231                         else
1232                         {
1233                                 do
1234                                 {
1235                                         if ((c2 = *infile++) == '\0')
1236                                         {
1237                                                 c2 = -1;
1238                                                 break;
1239                                         }
1240                                 } while ((c2 = HEXCHAR(c2)) == -1);
1241
1242                                 if (c2 == -1)
1243                                         break;
1244                                 nchar++;
1245                                 *(*outfile)++ = c1 << 4 | c2;
1246                         }
1247                 }
1248                 else
1249                 {
1250                         nchar++;
1251                         *(*outfile)++ = c1;
1252                         if (c1 == '\n')
1253                                 break;
1254                 }
1255         }
1256         *(*outfile)++ = '\0';
1257         if (nchar >= maxlen)
1258                 return (infile - b - 1);
1259         return -1;
1260 }
1261 #endif /* MIME7TO8 */