bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen#include "lib.h"
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen#include "str.h"
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen#include "istream.h"
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen#include "mail-html2text.h"
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen#include "test-common.h"
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen
b7324e421e2132cbbf753e6fdbe675bbaecdf929Timo Sirainenstatic const struct {
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen const char *input;
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen const char *output;
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen} tests[] = {
e61d5a4bad247e62a257bb1ed6c483923d10c2a8Timo Sirainen { "&&aaaaaaaaaa", "" },
e61d5a4bad247e62a257bb1ed6c483923d10c2a8Timo Sirainen
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a&<♣>b",
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen "a&<\xE2\x99\xA3>b" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "&", "" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "&amp", "" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<style>stylesheet is ignored</style>b",
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen "a b" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<stylea>b</stylea>c",
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen "a b c" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<!--x <p foo=\"bar\">commented tags ignored also</p> y-->b",
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen "ab" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<script>javascript <p>foo</p> ignored</script>b",
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen "a b" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<scripta>b</scripta>c",
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen "a b c" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<blockquote><blockquote>second level</blockquote>ignored</blockquote>b",
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen "a b" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<![CDATA[<style>]] >b</style>]]>c",
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen "a<style>]] >b</style>c" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<foo", "a" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<blockquote", "a" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<blockquote>foo</blockquote", "a " },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<", "a" },
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen { "a<![CDATA[b", "ab" },
1fce089d565e560450a25c3e901e066a50fdb6bfAki Tuomi { "a<![CDATA[b]]", "ab" },
1fce089d565e560450a25c3e901e066a50fdb6bfAki Tuomi { "a&#228;", "a\xC3\xA4" },
1fce089d565e560450a25c3e901e066a50fdb6bfAki Tuomi { "a&#xe4;", "a\xC3\xA4" },
1fce089d565e560450a25c3e901e066a50fdb6bfAki Tuomi { "&#8364;", "\xE2\x82\xAC" },
5c1837529e6957da3e389683c43bd006859395e5Aki Tuomi { "&#deee;", "" }, // invalid codepoint
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen};
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen
c9141125278100269eb3a907c911afe78c46717cTimo Sirainenstatic const char *test_blockquote_input =
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen "a<blockquote>b<blockquote><blockquote>c</blockquote>d</blockquote>e</blockquote>f";
c9141125278100269eb3a907c911afe78c46717cTimo Sirainenstatic const char *test_blockquote_output = "a b c d e f";
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen
c9141125278100269eb3a907c911afe78c46717cTimo Sirainenstatic void test_mail_html2text(void)
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen{
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen string_t *str = t_str_new(128);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen struct mail_html2text *ht;
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen unsigned int i, j;
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen test_begin("mail_html2text()");
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen for (i = 0; i < N_ELEMENTS(tests); i++) {
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen ht = mail_html2text_init(MAIL_HTML2TEXT_FLAG_SKIP_QUOTED);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen for (j = 0; tests[i].input[j] != '\0'; j++) {
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen unsigned char c = tests[i].input[j];
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen mail_html2text_more(ht, &c, 1, str);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen }
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen test_assert_idx(strcmp(str_c(str), tests[i].output) == 0, i);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen mail_html2text_deinit(&ht);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen str_truncate(str, 0);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen }
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen /* test without skipping quoted */
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen ht = mail_html2text_init(0);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen mail_html2text_more(ht, (const void *)test_blockquote_input,
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen strlen(test_blockquote_input), str);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen test_assert(strcmp(str_c(str), test_blockquote_output) == 0);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen mail_html2text_deinit(&ht);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen test_end();
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen}
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainenstatic void test_mail_html2text_random(void)
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen{
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen string_t *str = t_str_new(128);
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen struct mail_html2text *ht;
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen test_begin("mail_html2text() random");
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen for (unsigned int i = 0; i < 1000; i++) {
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen char valid_chars[] = { '0', 'a', '<', '>', '&', ';', '\\', '\'', '"', '/' };
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen unsigned char s[2];
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen ht = mail_html2text_init(0);
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen for (unsigned int i = 0; i < 100; i++) {
191153d1a5b0eb0c129139570e3aa5212f28d2acJosef 'Jeff' Sipek s[0] = valid_chars[i_rand_limit(N_ELEMENTS(valid_chars))];
191153d1a5b0eb0c129139570e3aa5212f28d2acJosef 'Jeff' Sipek s[1] = valid_chars[i_rand_limit(N_ELEMENTS(valid_chars))];
191153d1a5b0eb0c129139570e3aa5212f28d2acJosef 'Jeff' Sipek mail_html2text_more(ht, s, i_rand_minmax(1, 2), str);
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen }
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen mail_html2text_deinit(&ht);
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen str_truncate(str, 0);
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen }
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen test_end();
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen}
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen
c9141125278100269eb3a907c911afe78c46717cTimo Sirainenint main(void)
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen{
baf3e87e186453fda13bd21f7cbcb2efc8492e8bTimo Sirainen static void (*const test_functions[])(void) = {
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen test_mail_html2text,
0bbddb0182685d0bb12973d8263bdb66415ae37aTimo Sirainen test_mail_html2text_random,
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen NULL
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen };
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen return test_run(test_functions);
c9141125278100269eb3a907c911afe78c46717cTimo Sirainen}