bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2013-2018 Dovecot authors, see the included COPYING file */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "lib.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "net.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "str.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "strescape.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "http-url.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "http-parser.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch/*
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch Character definitions:
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / DIGIT / ALPHA
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch ; any VCHAR, except special
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch special = "(" / ")" / "<" / ">" / "@" / ","
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / ";" / ":" / "\" / DQUOTE / "/" / "["
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch / "]" / "?" / "=" / "{" / "}"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch qdtext = OWS / %x21 / %x23-5B / %x5D-7E / obs-text
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch qdtext-nf = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch ctext = OWS / %x21-27 / %x2A-5B / %x5D-7E / obs-text
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch obs-text = %x80-FF
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch OWS = *( SP / HTAB )
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch VCHAR = %x21-7E
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 't68char' = ALPHA / DIGIT / "-" / "." / "_" / "~" / "+" / "/"
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch 'text' = ( HTAB / SP / VCHAR / obs-text )
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch Character bit mappings:
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch (1<<0) => ALPHA / DIGIT / "-" / "." / "_" / "~" / "+"
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch (1<<1) => "!" / "#" / "$" / "%" / "&" / "'" / "*" / "^" / "`" / "|"
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch (1<<2) => special
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch (1<<3) => %x21 / %x2A-5B / %x5D-7E
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch (1<<4) => %x23-29
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch (1<<5) => %x22-27
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch (1<<6) => HTAB / SP / obs-text
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch (1<<7) => "/"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Boschconst unsigned char _http_token_char_mask = (1<<0)|(1<<1);
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Boschconst unsigned char _http_value_char_mask = (1<<0)|(1<<1)|(1<<2);
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Boschconst unsigned char _http_text_char_mask = (1<<0)|(1<<1)|(1<<2)|(1<<6);
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Boschconst unsigned char _http_qdtext_char_mask = (1<<3)|(1<<4)|(1<<6);
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Boschconst unsigned char _http_ctext_char_mask = (1<<3)|(1<<5)|(1<<6);
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Boschconst unsigned char _http_token68_char_mask = (1<<0)|(1<<7);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschconst unsigned char _http_char_lookup[256] = {
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, // 00
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 64, 10, 36, 50, 50, 50, 50, 50, 20, 20, 10, 9, 12, 9, 9, 140, // 20
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 12, 12, 12, 12, 12, // 30
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 12, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 40
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 4, 12, 10, 9, // 50
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 60
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 10, 12, 9, 0, // 70
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 80
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 90
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // A0
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // B0
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // C0
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // D0
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // E0
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // F0
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch};
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch/*
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch * HTTP value parsing
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschvoid http_parser_init(struct http_parser *parser,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch const unsigned char *data, size_t size)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
efe78d3ba24fc866af1c79b9223dc0809ba26cadStephan Bosch i_zero(parser);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch parser->begin = data;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch parser->cur = data;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch parser->end = data + size;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschvoid http_parse_ows(struct http_parser *parser)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* OWS = *( SP / HTAB ) */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (parser->cur >= parser->end)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch while (parser->cur < parser->end &&
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch (parser->cur[0] == ' ' || parser->cur[0] == '\t')) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch parser->cur++;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Boschint http_parser_skip_token(struct http_parser *parser)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* token = 1*tchar */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (parser->cur >= parser->end || !http_char_is_token(*parser->cur))
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return 0;
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch parser->cur++;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch while (parser->cur < parser->end && http_char_is_token(*parser->cur))
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch parser->cur++;
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch return 1;
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch}
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Boschint http_parse_token(struct http_parser *parser, const char **token_r)
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch{
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch const unsigned char *first = parser->cur;
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch int ret;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch if ((ret=http_parser_skip_token(parser)) <= 0)
0b5bdb5ba6c8cb928b6d2dcc4636148bfa1a7ec7Stephan Bosch return ret;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch *token_r = t_strndup(first, parser->cur - first);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return 1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschint http_parse_token_list_next(struct http_parser *parser,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch const char **token_r)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-21;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch Appendix B:
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch For compatibility with legacy list rules, recipients SHOULD accept
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch empty list elements. In other words, consumers would follow the list
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch productions:
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch #element => [ ( "," / element ) *( OWS "," [ OWS element ] ) ]
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch 1#element => *( "," OWS ) element *( OWS "," [ OWS element ] )
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch for (;;) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (http_parse_token(parser, token_r) > 0)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch break;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch http_parse_ows(parser);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (parser->cur >= parser->end || parser->cur[0] != ',')
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return 0;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch parser->cur++;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch http_parse_ows(parser);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return 1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Boschint http_parse_quoted_string(struct http_parser *parser, const char **str_r)
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch{
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch string_t *str;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch /* quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'['
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch / %x5D-7E ; ']'-'~'
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch / obs-text
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch obs-text = %x80-FF
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch */
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch /* DQUOTE */
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch if (parser->cur >= parser->end || parser->cur[0] != '"')
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch return 0;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch parser->cur++;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch /* *( qdtext / quoted-pair ) */
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch str = t_str_new(256);
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch for (;;) {
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch const unsigned char *first;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch /* *qdtext */
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch first = parser->cur;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch while (parser->cur < parser->end && http_char_is_qdtext(*parser->cur))
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch parser->cur++;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch if (parser->cur >= parser->end)
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch return -1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch str_append_n(str, first, parser->cur - first);
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch /* DQUOTE */
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch if (*parser->cur == '"') {
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch parser->cur++;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch break;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch /* "\" */
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch } else if (*parser->cur == '\\') {
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch parser->cur++;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch if (parser->cur >= parser->end || !http_char_is_text(*parser->cur))
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch return -1;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch str_append_c(str, *parser->cur);
aefccddf68a92e43209aea013a36d0d7d374f410Stephan Bosch parser->cur++;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch /* ERROR */
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch } else {
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch return -1;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch }
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch }
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch *str_r = str_c(str);
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch return 1;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch}
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch
6ee9ce5ed955a1283dc22ad28980bf9cc23d4c4eStephan Boschint http_parse_token_or_qstring(struct http_parser *parser,
6ee9ce5ed955a1283dc22ad28980bf9cc23d4c4eStephan Bosch const char **word_r)
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch{
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch if (parser->cur >= parser->end)
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch return 0;
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch if (parser->cur[0] == '"')
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch return http_parse_quoted_string(parser, word_r);
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch return http_parse_token(parser, word_r);
233a96794faa7734dfc3aea19503c14b29cccfb7Stephan Bosch}