bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2007-2018 Dovecot authors, see the included COPYING file */
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen static const char input[] = "\xC3\xA4\xC3\xA4\0a";
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_strlen_n(input, 1) == 0);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_strlen_n(input, 2) == 1);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_strlen_n(input, 3) == 1);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_strlen_n(input, 4) == 2);
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainenstatic void test_unichar_uni_utf8_partial_strlen_n(void)
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen static const char input[] = "\xC3\xA4\xC3\xA4\0a";
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 1, &pos) == 0 && pos == 0);
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 2, &pos) == 1 && pos == 2);
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 3, &pos) == 1 && pos == 2);
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 4, &pos) == 2 && pos == 4);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 5, &pos) == 3 && pos == 5);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 6, &pos) == 4 && pos == 6);
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xc3\x28", FALSE, 0x0 }, /* has invalid 2nd octet */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xa0\xa1", FALSE, 0x0 }, /* invalid sequence identifier */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xed\xa0\x80", FALSE, 0x0 }, /* surrogate halves, U+D800 .. */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xe2\x28\xa1", FALSE, 0x0 }, /* invalid 2nd octet */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xe2\x82\x28", FALSE, 0x0 }, /* invalid 3rd octet */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xf0\x90\x8c\xbc", TRUE, 0x1033C }, /* U+1033C */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xf0\x28\x8c\xbc", FALSE, 0x0 }, /*invalid 2nd octet*/
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xf0\x90\x28\xbc", FALSE, 0x0 }, /* invalid 3rd octet */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xf0\x28\x8c\x28", FALSE, 0x0 }, /* invalid 4th octet */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xf4\x80\x80\x80", TRUE, 0x100000 }, /* U+100000, supplementary plane start */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xf4\x8f\xbf\xbf", TRUE, 0x10FFFF }, /* U+10FFFF, maximum value */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xf8\xa1\xa1\xa1\xa1", FALSE, 0x0 }, /* invalid unicode */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi { "\xfc\xa1\xa1\xa1\xa1\xa1", FALSE, 0x0 }, /* invalid unicode */
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi for(size_t i = 0; i < N_ELEMENTS(test_cases); i++) {
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi test_assert_idx(uni_utf8_get_char(test_cases[i].input, &chr) > 0, i);
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi test_assert_idx(test_cases[i].expected == chr, i);
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi test_assert_idx(uni_utf8_get_char(test_cases[i].input, &chr) < 1, i);
9b26e0815de529575947efcf62b90d6e220d83a6Aki Tuomi test_assert(uni_join_surrogate(high, low) == orig);
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz static const char overlong_utf8[] = "\xf8\x80\x95\x81\xa1";
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz static const char collate_in[] = "\xc3\xbc \xc2\xb3";
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz static const char collate_exp[] = "U\xcc\x88 3";
e7e451b7d9339883db29bad79df5256f9b410652Aki Tuomi /* skip surrogates */
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody /* The bottom 6 bits should be irrelevant to code coverage,
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody only test 000000, 111111, and something in between. */
191153d1a5b0eb0c129139570e3aa5212f28d2acJosef 'Jeff' Sipek chr += i_rand_limit(62); /* After 0, somewhere between 1 and 62 */
86763e4c68313df83606e1060fc1795280590677Timo Sirainen test_assert(uni_utf8_str_is_valid(str_c(str)));
2c8b34a5fb85c2f6711f603efc934512af758654Timo Sirainen test_assert(uni_utf8_get_char(str_c(str), &chr2) == (int)uni_utf8_char_bytes(*str_data(str)));
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody unsigned int utf8len = uni_utf8_char_bytes(*str_c(str));
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody /* virtually truncate the byte string */
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody while (--utf8len > 0)
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody test_assert(uni_utf8_get_char_n(str_c(str), utf8len, &chr2) == 0);
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody /* actually truncate the byte stream */
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody while (--utf8len > 0) {
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody test_assert(!uni_utf8_str_is_valid(str_c(str)));
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody test_assert(uni_utf8_get_char(str_c(str), &chr2) == 0);
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz collate_out = buffer_create_dynamic(default_pool, 32);
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz uni_utf8_to_decomposed_titlecase(collate_in, sizeof(collate_in),
23bdbb7b1831785c6ba6df190f6369da882d2b9dTimo Sirainen test_assert(strcmp(collate_out->data, collate_exp) == 0);
86763e4c68313df83606e1060fc1795280590677Timo Sirainen test_assert(!uni_utf8_str_is_valid(overlong_utf8));