bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen#include "lib.h"
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen#include "buffer.h"
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen#include "message-parser.h"
ed9f9fe03e60d37be0a58e3ee281d1b6b7d2693fTimo Sirainen#include "mail-html2text.h"
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen#include "fts-parser.h"
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainenstruct html_fts_parser {
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen struct fts_parser parser;
ed9f9fe03e60d37be0a58e3ee281d1b6b7d2693fTimo Sirainen struct mail_html2text *html2text;
ed9f9fe03e60d37be0a58e3ee281d1b6b7d2693fTimo Sirainen buffer_t *output;
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen};
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainenstatic struct fts_parser *
43f4e550efe7afa72aab3a7ded7ad7ce606526fbSergey Kitovfts_parser_html_try_init(struct fts_parser_context *parser_context)
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen{
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen struct html_fts_parser *parser;
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
43f4e550efe7afa72aab3a7ded7ad7ce606526fbSergey Kitov if (!mail_html2text_content_type_match(parser_context->content_type))
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen return NULL;
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen parser = i_new(struct html_fts_parser, 1);
ecc5a1a440799a0966c26da956f5d6e2d8073e03Timo Sirainen parser->parser.v = fts_parser_html;
ed9f9fe03e60d37be0a58e3ee281d1b6b7d2693fTimo Sirainen parser->html2text = mail_html2text_init(0);
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen parser->output = buffer_create_dynamic(default_pool, 4096);
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen return &parser->parser;
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen}
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainenstatic void fts_parser_html_more(struct fts_parser *_parser,
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen struct message_block *block)
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen{
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen struct html_fts_parser *parser = (struct html_fts_parser *)_parser;
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
9456cdbebac7bdf7ee4fb2a2e41a9f439b7744ceTimo Sirainen if (block->size == 0) {
9456cdbebac7bdf7ee4fb2a2e41a9f439b7744ceTimo Sirainen /* finished */
9456cdbebac7bdf7ee4fb2a2e41a9f439b7744ceTimo Sirainen return;
9456cdbebac7bdf7ee4fb2a2e41a9f439b7744ceTimo Sirainen }
9456cdbebac7bdf7ee4fb2a2e41a9f439b7744ceTimo Sirainen
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen buffer_set_used_size(parser->output, 0);
ed9f9fe03e60d37be0a58e3ee281d1b6b7d2693fTimo Sirainen mail_html2text_more(parser->html2text, block->data, block->size,
ed9f9fe03e60d37be0a58e3ee281d1b6b7d2693fTimo Sirainen parser->output);
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen block->data = parser->output->data;
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen block->size = parser->output->used;
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen}
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
07bfc948f87ca139b18af57fb2615a7fd4f668b3Timo Sirainenstatic int fts_parser_html_deinit(struct fts_parser *_parser,
07bfc948f87ca139b18af57fb2615a7fd4f668b3Timo Sirainen const char **retriable_err_msg_r ATTR_UNUSED)
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen{
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen struct html_fts_parser *parser = (struct html_fts_parser *)_parser;
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
432dd1f5d7deab3725265cd7e98bb6d74650b635Timo Sirainen mail_html2text_deinit(&parser->html2text);
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen buffer_free(&parser->output);
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen i_free(parser);
07bfc948f87ca139b18af57fb2615a7fd4f668b3Timo Sirainen return 1;
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen}
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen
ecc5a1a440799a0966c26da956f5d6e2d8073e03Timo Sirainenstruct fts_parser_vfuncs fts_parser_html = {
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen fts_parser_html_try_init,
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen fts_parser_html_more,
3ad57148af55e45cc45401162d3460ed0b237a10Timo Sirainen fts_parser_html_deinit,
3ad57148af55e45cc45401162d3460ed0b237a10Timo Sirainen NULL
772120713c176bde8c932b3fb4c413d223741fcdTimo Sirainen};