mime.c revision 1
1N/A * Copyright (c) 1998-2003, 2006 Sendmail, Inc. and its suppliers. 1N/A * All rights reserved. 1N/A * Copyright (c) 1994, 1996-1997 Eric P. Allman. All rights reserved. 1N/A * Copyright (c) 1994 1N/A * The Regents of the University of California. All rights reserved. 1N/A * By using this file, you agree to the terms and conditions set 1N/A * forth in the LICENSE file which can be found at the top level of 1N/A * the sendmail distribution. 1N/A#
pragma ident "%Z%%M% %I% %E% SMI" 1N/A** I am indebted to John Beck of Hewlett-Packard, who contributed 1N/A** his code to me for inclusion. As it turns out, I did not use 1N/A** his code since he used a "minimum change" approach that used 1N/A** several temp files, and I wanted a "minimum impact" approach 1N/A** that would avoid copying. However, looking over his code 1N/A** helped me cement my understanding of the problem. 1N/A** I also looked at, but did not directly use, Nathaniel 1N/A** Borenstein's "code.c" module. Again, it functioned as 1N/A** a file-to-file translator, which did not fit within my 1N/A** design bounds, but it was a useful base for understanding 1N/A/* use "old" mime 7 to 8 algorithm by default */ 1N/A#
endif /* ! MIME7TO8_OLD */ 1N/A/* character set for hex and base64 encoding */ 1N/Astatic char Base64Code[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1N/A/* types of MIME boundaries */ 1N/A#
define MBT_FINAL 3 /* final boundary (trailing -- included) */ 1N/A "SYNTAX",
"NOTSEP",
"INTERMED",
"FINAL" 1N/A** MIME8TO7 -- output 8 bit body in 7 bit format 1N/A** The header has already been output -- this has to do the 1N/A** 8 to 7 bit conversion. It would be easy if we didn't have 1N/A** We won't be called if we don't have to do a conversion, and 1N/A** appropriate MIME-Version: and Content-Type: fields have been 1N/A** output. Any Content-Transfer-Encoding: field has not been 1N/A** output, and we can add it here. 1N/A** mci -- mailer connection information. 1N/A** header -- the header for this body part. 1N/A** boundaries -- the currently pending message boundaries. 1N/A** NULL if we are processing the outer portion. 1N/A** flags -- to tweak processing. 1N/A** level -- recursion level. 1N/A** An indicator of what terminated the message part: 1N/A** MBT_FINAL -- the final boundary 1N/A** MBT_INTERMED -- an intermediate boundary 1N/A** MBT_NOTSEP -- an end of file 1N/A** SM_IO_EOF -- I/O error occurred 1N/A usrerr(
"mime8to7: recursion level %d exceeded",
1N/A /* break out parameters */ 1N/A /* skip to semicolon separator */ 1N/A /* complain about empty values */ 1N/A usrerr(
"mime8to7: Empty parameter in Content-Type header");
1N/A /* avoid bounce loops */ 1N/A /* extract field name */ 1N/A /* see if there is a value */ 1N/A /* check for disaster cases */ 1N/A /* don't propagate some flags more than one level into the message */ 1N/A ** Check for cases that can not be encoded. 1N/A ** For example, you can't encode certain kinds of types 1N/A ** or already-encoded messages. If we find this case, 1N/A ** just copy it through. 1N/A#
endif /* USE_B_CLASS */ 1N/A ** Multipart requires special processing. 1N/A ** Do a recursive descent into the message. 1N/A usrerr(
"mime8to7: Content-Type: \"%s\": %s boundary",
1N/A i >=
argc ?
"missing" :
"bogus", p);
1N/A /* avoid bounce loops */ 1N/A usrerr(
"mime8to7: multipart boundary \"%s\" too long",
1N/A /* avoid bounce loops */ 1N/A usrerr(
"mime8to7: multipart nesting boundary too deep");
1N/A /* avoid bounce loops */ 1N/A /* skip the early "comment" prologue */ 1N/A /* skip the late "comment" epilogue */ 1N/A ** Class 's' is predefined to have "rfc822" only. 1N/A ** Non-compound body type 1N/A ** Compute the ratio of seven to eight bit characters; 1N/A ** use that as a heuristic to decide how to do the 1N/A /* remember where we were */ 1N/A syserr(
"mime8to7: cannot sm_io_tell on %cf%s",
1N/A /* do a scan of this body type to count character types */ 1N/A for (p =
buf; *p !=
'\0'; p++)
1N/A /* count bytes with the high bit set */ 1N/A ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 1N/A ** assume base64. This heuristic avoids double-reading 1N/A ** large graphics or video files. 1N/A /* return to the original offset for processing */ 1N/A /* XXX use relative seeks to handle >31 bit file sizes? */ 1N/A syserr(
"mime8to7: cannot sm_io_fseek on %cf%s",
1N/A ** Heuristically determine encoding method. 1N/A ** If more than 1/8 of the total characters have the 1N/A ** eighth bit set, use base64; else use quoted-printable. 1N/A ** However, only encode binary encoded data as base64, 1N/A ** since otherwise the NL=>CRLF mapping will be a problem. 1N/A sm_dprintf(
"mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
1N/A /* no encoding necessary */ 1N/A ** Skip _unless_ in MIME mode and potentially 1N/A ** converting from 8 bit to 7 bit MIME. See 1N/A ** putheader() for the counterpart where the 1N/A ** CTE header is skipped in the opposite 1N/A "Content-Transfer-Encoding: %.200s",
cte);
1N/A /* use base64 encoding */ 1N/A "X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
1N/A /* use quoted-printable encoding */ 1N/A /* set up map of characters that must be mapped */ 1N/A for (p =
"!\"#$@[\\]^`{|}~"; *p !=
'\0'; p++)
1N/A if (!
putline(
"Content-Transfer-Encoding: quoted-printable",
1N/A "X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
1N/A /* output any saved character */ 1N/A** MIME_GETCHAR -- get a character for MIME processing 1N/A** Treats boundaries as SM_IO_EOF. 1N/A** fp -- the input file. 1N/A** boundaries -- the current MIME boundaries. 1N/A** btp -- if the return value is SM_IO_EOF, *btp is set to 1N/A** the type of the boundary. 1N/A** The next character in the input stream. 1N/A static bool atbol =
true;
/* at beginning of line */ 1N/A static unsigned char buf[
128];
/* need not be a full line */ 1N/A int start = 0;
/* indicates position of - in buffer */ 1N/A /* last \n in buffer may be part of next MIME boundary */ 1N/A /* might be part of a MIME boundary */ 1N/A /* check for a message boundary */ 1N/A /* got "--", now check for rest of separator */ 1N/A *
bp =
'\0';
/* XXX simply cut off? */ 1N/A /* we have a message boundary */ 1N/A** MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF 1N/A** fp -- the input file. 1N/A** boundaries -- the current MIME boundaries. 1N/A** btp -- if the return value is SM_IO_EOF, *btp is set to 1N/A** the type of the boundary. 1N/A** The next character in the input stream. 1N/A** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 1N/A** line -- the input line. 1N/A** boundaries -- the set of currently pending boundaries. 1N/A** MBT_NOTSEP -- if this is not a separator line 1N/A** MBT_INTERMED -- if this is an intermediate separator 1N/A** MBT_FINAL -- if this is a final boundary 1N/A** MBT_SYNTAX -- if this is a boundary for the wrong 1N/A** enclosure -- i.e., a syntax error. 1N/A /* strip off trailing whitespace */ 1N/A while (i > 0 && (
line[i -
1] ==
' ' ||
line[i -
1] ==
'\t' 1N/A#
endif /* _FFR_MIME_CR_OK */ 1N/A /* check for this as an intermediate boundary */ 1N/A /* check for a final boundary */ 1N/A** DEFCHARSET -- return default character set for message 1N/A** The first choice for character set is for the mailer 1N/A** corresponding to the envelope sender. If neither that 1N/A** nor the global configuration file has a default character 1N/A** set defined, return "unknown-8bit" as recommended by 1N/A** RFC 1428 section 3. 1N/A** e -- the envelope for this message. 1N/A** The default character set for that mailer. 1N/A return "unknown-8bit";
1N/A** ISBOUNDARY -- is a given string a currently valid boundary? 1N/A** line -- the current input line. 1N/A** boundaries -- the list of valid boundaries. 1N/A** The index number in boundaries if the line is found. 1N/A#
endif /* MIME8TO7 */ 1N/A** MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format 1N/A** This is a hack. Supports translating the two 7-bit body-encodings 1N/A** (quoted-printable and base64) to 8-bit coded bodies. 1N/A** There is not much point in supporting multipart here, as the UA 1N/A** will be able to deal with encoded MIME bodies if it can parse MIME 1N/A** multipart messages. 1N/A** Note also that we won't be called unless it is a text/plain MIME 1N/A** message, encoded base64 or QP and mailer flag '9' has been defined 1N/A** Contributed by Marius Olaffson <marius@rhi.hi.is>. 1N/A** mci -- mailer connection information. 1N/A** header -- the header for this body part. 1N/A** true iff body was written successfully 1N/A -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1,
1N/A -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1,
1N/A -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,
62, -
1,-
1,-
1,
63,
1N/A 52,
53,
54,
55,
56,
57,
58,
59,
60,
61,-
1,-
1, -
1,-
1,-
1,-
1,
1N/A -
1, 0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
1N/A 15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,-
1, -
1,-
1,-
1,-
1,
1N/A -
1,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
1N/A 41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,-
1, -
1,-
1,-
1,-
1 1N/A /* "can't happen" -- upper level should have caught this */ 1N/A /* avoid bounce loops */ 1N/A "Content-Transfer-Encoding: %s", p);
1N/A "X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1N/A ** Translate body encoding to 8-bit. Supports two types of 1N/A ** encodings; "base64" and "quoted-printable". Assume qp if 1N/A ** it is not base64. 1N/A#
else /* MIME7TO8_OLD */ 1N/A#
endif /* MIME7TO8_OLD */ 1N/A /* quoted-printable */ 1N/A /* force out partial last line */ 1N/A ** The decoded text may end without an EOL. Since this function 1N/A ** is only called for text/plain MIME messages, it is safe to 1N/A ** add an extra one at the end just in case. This is a hack, 1N/A ** but so is auto-converting MIME in the first place. 1N/A** The following is based on Borenstein's "codes.c" module, with simplifying 1N/A** changes as we do not deal with multipart, and to do the translation in-core, 1N/A** with an attempt to prevent overrun of output buffers. 1N/A** What is needed here are changes to defend this code better against 1N/A** bad encodings. Questionable to always return 0xFF for bad mappings. 1N/A -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1,
1N/A -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1,
1N/A -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1,
1N/A 0,
1,
2,
3,
4,
5,
6,
7,
8,
9,-
1,-
1, -
1,-
1,-
1,-
1,
1N/A -
1,
10,
11,
12,
13,
14,
15,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1,
1N/A -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1,
1N/A -
1,
10,
11,
12,
13,
14,
15,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1,
1N/A -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1, -
1,-
1,-
1,-
1 1N/A** MIME_FROMQP -- decode quoted printable string 1N/A** infile -- input (encoded) string 1N/A** outfile -- output string 1N/A** maxlen -- size of output buffer 1N/A** -2 if decoding failure 1N/A** -1 if infile completely decoded into outfile 1N/A** >= 0 is the position in infile decoding 1N/A** reached before maxlen was reached 1N/A int maxlen;
/* Max # of chars allowed in outfile */ 1N/A /* decrement by one for trailing '\0', at least one other char */ 1N/A /* ignore it and the rest of the buffer */ 1N/A#
endif /* MIME7TO8 */