test-unichar.c revision 7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9
7cb128dc4cae2a03a742f63ba7afee23c78e3af0Phil Carmody/* Copyright (c) 2007-2015 Dovecot authors, see the included COPYING file */
86763e4c68313df83606e1060fc1795280590677Timo Sirainen
86763e4c68313df83606e1060fc1795280590677Timo Sirainen#include "test-lib.h"
86763e4c68313df83606e1060fc1795280590677Timo Sirainen#include "str.h"
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz#include "buffer.h"
86763e4c68313df83606e1060fc1795280590677Timo Sirainen#include "unichar.h"
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody#include <stdlib.h>
86763e4c68313df83606e1060fc1795280590677Timo Sirainen
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainenstatic void test_unichar_uni_utf8_strlen(void)
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen{
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen static const char input[] = "\xC3\xA4\xC3\xA4\0a";
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_begin("uni_utf8_strlen()");
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_strlen(input) == 2);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_end();
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_begin("uni_utf8_strlen_n()");
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_strlen_n(input, 1) == 0);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_strlen_n(input, 2) == 1);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_strlen_n(input, 3) == 1);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_strlen_n(input, 4) == 2);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_end();
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen}
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainenstatic void test_unichar_uni_utf8_partial_strlen_n(void)
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen{
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen static const char input[] = "\xC3\xA4\xC3\xA4\0a";
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen size_t pos;
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_begin("uni_utf8_partial_strlen_n()");
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 1, &pos) == 0 && pos == 0);
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 2, &pos) == 1 && pos == 2);
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 3, &pos) == 1 && pos == 2);
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 4, &pos) == 2 && pos == 4);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 5, &pos) == 3 && pos == 5);
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 6, &pos) == 4 && pos == 6);
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_end();
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen}
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen
86763e4c68313df83606e1060fc1795280590677Timo Sirainenvoid test_unichar(void)
86763e4c68313df83606e1060fc1795280590677Timo Sirainen{
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz static const char overlong_utf8[] = "\xf8\x80\x95\x81\xa1";
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz static const char collate_in[] = "\xc3\xbc \xc2\xb3";
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz static const char collate_exp[] = "U\xcc\x88 3";
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz buffer_t *collate_out;
86763e4c68313df83606e1060fc1795280590677Timo Sirainen unichar_t chr, chr2;
86763e4c68313df83606e1060fc1795280590677Timo Sirainen string_t *str = t_str_new(16);
86763e4c68313df83606e1060fc1795280590677Timo Sirainen
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody test_begin("unichars encode/decode");
86763e4c68313df83606e1060fc1795280590677Timo Sirainen for (chr = 0; chr <= 0x10ffff; chr++) {
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody /* The bottom 6 bits should be irrelevant to code coverage,
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody only test 000000, 111111, and something in between. */
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody if ((chr & 63) == 1)
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody chr += rand() % 62; /* After 0, somewhere between 1 and 62 */
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody else if ((chr & 63) > 0 && (chr & 63) < 63)
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody chr |= 63; /* After random, straight to 63 */
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody
86763e4c68313df83606e1060fc1795280590677Timo Sirainen str_truncate(str, 0);
86763e4c68313df83606e1060fc1795280590677Timo Sirainen uni_ucs4_to_utf8_c(chr, str);
86763e4c68313df83606e1060fc1795280590677Timo Sirainen test_assert(uni_utf8_str_is_valid(str_c(str)));
86763e4c68313df83606e1060fc1795280590677Timo Sirainen test_assert(uni_utf8_get_char(str_c(str), &chr2) > 0);
86763e4c68313df83606e1060fc1795280590677Timo Sirainen test_assert(chr2 == chr);
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody if ((chr & 0x63) == 0) {
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody unsigned int utf8len = uni_utf8_char_bytes(*str_c(str));
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody /* virtually truncate the byte string */
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody while (--utf8len > 0)
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody test_assert(uni_utf8_get_char_n(str_c(str), utf8len, &chr2) == 0);
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody utf8len = uni_utf8_char_bytes(*str_c(str));
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody /* actually truncate the byte stream */
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody while (--utf8len > 0) {
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody str_truncate(str, utf8len);
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody test_assert(!uni_utf8_str_is_valid(str_c(str)));
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody test_assert(uni_utf8_get_char(str_c(str), &chr2) == 0);
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody }
7c1b72e1a0b35aa27d4f74e9dd49fc3156eb4fa9Phil Carmody }
86763e4c68313df83606e1060fc1795280590677Timo Sirainen }
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody test_end();
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz
5fea87f050d4a6a8c8a3b259a25541fc78e5f2cdPhil Carmody test_begin("unichar collation");
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz collate_out = buffer_create_dynamic(default_pool, 32);
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz uni_utf8_to_decomposed_titlecase(collate_in, sizeof(collate_in),
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz collate_out);
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz test_assert(!strcmp(collate_out->data, collate_exp));
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz buffer_free(&collate_out);
9281d385594c4f158958977f1ead1d2270ef1a9eFlorian Zeitz
86763e4c68313df83606e1060fc1795280590677Timo Sirainen test_assert(!uni_utf8_str_is_valid(overlong_utf8));
86763e4c68313df83606e1060fc1795280590677Timo Sirainen test_assert(uni_utf8_get_char(overlong_utf8, &chr2) < 0);
86763e4c68313df83606e1060fc1795280590677Timo Sirainen test_end();
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen
32ae620015da6ab2ec28e04d3cdcdb4420f1fa6bTimo Sirainen test_unichar_uni_utf8_strlen();
f66c8939c39e6bcd9dd5482bfd9689bd177ce0d4Timo Sirainen test_unichar_uni_utf8_partial_strlen_n();
86763e4c68313df83606e1060fc1795280590677Timo Sirainen}