test-fts-language.c revision c5effa0f13da8f45991c89a9d8c9d2109db66039
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* TODO: These checks will not work without proper libtextcat configuration.
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen As such, they are not really unit test to be coupled with the build. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst char *const settings[] =
4e07da7f29d35d1517fce9b7300c6c19f804325bTimo Sirainen {"fts_language_config", TEXTCAT_DATADIR"/fpdb.conf",
4e07da7f29d35d1517fce9b7300c6c19f804325bTimo Sirainen "fts_language_data", TEXTCAT_DATADIR"/", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Detect Finnish. fi--utf8 */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_language_detect_finnish(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char finnish[] =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Yhdistyneiden kansakuntien kolmas yleiskokous hyv\xC3\xA4ksyi "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "ja julkisti ihmisoikeuksien yleismaailmallisen julistuksen "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "joulukuun 10. p\xC3\xA4iv\xC3\xA4n\xC3\xA4 1948. Julistuksen "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "hyv\xC3\xA4ksymisen puolesta \xC3\xA4\xC3\xA4nesti 48 maata. "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Mik\xC3\xA4\xC3\xA4n maa ei \xC3\xA4\xC3\xA4nest\xC3\xA4nyt "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "vastaan. Kahdeksan maata pid\xC3\xA4ttyi "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "\xC3\xA4\xC3\xA4nest\xC3\xA4m\xC3\xA4st\xC3\xA4.";
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainen test_assert(fts_language_list_init(settings, &lp, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_detect(lp, finnish, sizeof(finnish)-1, &lang_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Detect English */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_language_detect_english(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char english[] = "Whereas recognition of the inherent dignity and"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen " of the equal and inalienable rights of all members of the human"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "family is the foundation of freedom, justice and peace in the "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "world,\n Whereas disregard and contempt for human rights have "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "resulted in barbarous acts which have outraged the conscience"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "of mankind, and the advent of a world in which human beings"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "shall enjoy freedom of speech and belief and freedom from "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "fear and want has been proclaimed as the highest aspiration"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "of the common people, ";
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainen test_assert(fts_language_list_init(settings, &lp, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_detect(lp, english, sizeof(english)-1, &lang_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Detect French */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_language_detect_french(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char french[] =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "D\xC3\xA9""claration universelle des droits de l\xE2\x80\x99"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "homme Pr\xC3\xA9""ambule Consid\xC3\xA9rant que la "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "reconnaissance de la dignit\xC3\xA9 inh\xC3\xA9rente \xC3\xA0"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen " tous les membres de la famille humaine et de leurs droits "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "\xC3\xA9gaux et inali\xC3\xA9nables constitue le fondement de"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen " la libert\xC3\xA9, de la justice et de la paix dans le monde,"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen " Consid\xC3\xA9rant que la m\xC3\xA9""connaissance et le "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "m\xC3\xA9pris des droits de l\xE2\x80\x99homme ont conduit "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "\xC3\xA0 des actes de barbarie qui r\xC3\xA9voltent la "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "conscience de l\xE2\x80\x99humanit\xC3\xA9 et que "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "l\xE2\x80\x99""av\xC3\xA8nement d\xE2\x80\x99un monde o\xC3\xB9"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen " les \xC3\xAAtres humains seront libres de parler et de "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "croire, lib\xC3\xA9r\xC3\xA9s de la terreur et de la "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "mis\xC3\xA8re, a \xC3\xA9t\xC3\xA9 proclam\xC3\xA9 comme la "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "plus haute aspiration de l\xE2\x80\x99homme,";
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainen test_assert(fts_language_list_init(settings, &lp, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_detect(lp, french, sizeof(french)-1, &lang_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Detect German */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_language_detect_german(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char german[] =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Alle Menschen sind frei und gleich an W\xC3\xBCrde und Rechten "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "geboren. Sie sind mit Vernunft und Gewissen begabt und sollen "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "einander im Geist der Br\xC3\xBC""derlichkeit begegnen." \
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Artikel 2 Jeder hat Anspruch auf die in dieser "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Erkl\xC3\xA4rung verk\xC3\xBCndeten Rechte und Freiheiten ohne"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "irgendeinen Unterschied, etwa nach Rasse, Hautfarbe, "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Geschlecht, Sprache, Religion, politischer oder sonstiger "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "\xC3\x9C""berzeugung, nationaler oder sozialer Herkunft, "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Verm\xC3\xB6gen, Geburt oder sonstigem Stand. Des weiteren "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "darf kein Unterschied gemacht werden auf Grund der "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "politischen, rechtlichen oder internationalen Stellung des "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Landes oder Gebiets, dem eine Person angeh\xC3\xB6rt, "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "gleichg\xC3\xBCltig, ob dieses unabh\xC3\xA4ngig ist, unter "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Treuhandschaft steht, keine Selbstregierung besitzt oder "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "sonst in seiner Souver\xC3\xA4nit\xC3\xA4t "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "eingeschr\xC3\xA4nkt ist.";
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainen test_assert(fts_language_list_init(settings, &lp, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_detect(lp, german, sizeof(german)-1, &lang_r)
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila/* Detect Swedish */
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovilastatic void test_fts_language_detect_swedish(void)
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila const unsigned char swedish[] =
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "Artikel 1."\
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "Alla m\xC3\xA4nniskor \xC3\xA4ro f\xC3\xB6""dda fria och lika"\
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila " i v\xC3\xA4rde och r\xC3\xA4ttigheter. De \xC3\xA4ro "\
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "utrustade med f\xC3\xB6rnuft och samvete och b\xC3\xB6ra "\
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "handla gentemot varandra i en anda av broderskap.";
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(fts_language_list_init(settings, &lp, &error) == 0);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(fts_language_detect(lp, swedish, sizeof(swedish)-1, &lang_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Detect Finnish as English */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_language_detect_finnish_as_english(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char finnish[] =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Yhdistyneiden kansakuntien kolmas yleiskokous hyv\xC3\xA4ksyi "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "ja julkisti ihmisoikeuksien yleismaailmallisen julistuksen "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "joulukuun 10. p\xC3\xA4iv\xC3\xA4n\xC3\xA4 1948. Julistuksen "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "hyv\xC3\xA4ksymisen puolesta \xC3\xA4\xC3\xA4nesti 48 maata. "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Mik\xC3\xA4\xC3\xA4n maa ei \xC3\xA4\xC3\xA4nest\xC3\xA4nyt "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "vastaan. Kahdeksan maata pid\xC3\xA4ttyi "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "\xC3\xA4\xC3\xA4nest\xC3\xA4m\xC3\xA4st\xC3\xA4.";
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts language detect Finnish as English");
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainen test_assert(fts_language_list_init(settings, &lp, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_detect(lp, finnish, sizeof(finnish)-1, &lang_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Successfully avoid detecting English, when en is not in language list. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char english[] = "Whereas recognition of the inherent dignity and"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen " of the equal and inalienable rights of all members of the human"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "family is the foundation of freedom, justice and peace in the "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "world,\n Whereas disregard and contempt for human rights have "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "resulted in barbarous acts which have outraged the conscience"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "of mankind, and the advent of a world in which human beings"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "shall enjoy freedom of speech and belief and freedom from "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "fear and want has been proclaimed as the highest aspiration"\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "of the common people, ";
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts language detect not available");
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainen test_assert(fts_language_list_init(settings, &lp, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_detect(lp, english, sizeof(english)-1, &lang_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Successfully detect, that Klingon is unknown. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_language_detect_unknown(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char klingon[] = "nobwI''a'pu'qoqvam'e' "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "nuHegh'eghrupqa'moHlaHbe'law'lI'neS "\
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "SeH'eghtaHghach'a'na'chajmo'.";
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainen test_assert(fts_language_list_init(settings, &lp, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_list_add_names(lp, names, &unknown) == TRUE);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(fts_language_detect(lp, klingon, sizeof(klingon), &lang_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen static void (*test_functions[])(void) = {