bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
74ab5ea66c0c4b388f1c774ae6a47ab94f1b4f18Timo Sirainenstatic void result_print(ARRAY_TYPE(seq_range) *result)
74ab5ea66c0c4b388f1c774ae6a47ab94f1b4f18Timo Sirainen for (i = 0; i < count; i++) {
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen const char *trie_path = "/tmp/squat-test-index.search";
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen const char *uidlist_path = "/tmp/squat-test-index.search.uids";
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen ARRAY_TYPE(seq_range) definite_uids, maybe_uids;
2ac5f36aa7c2e7a07ba8815d43a6d7483f62e74cTimo Sirainen unsigned int last = 0, seq = 1, node_count, uidlist_count;
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen bool data_header = TRUE, first = TRUE, skip_body = FALSE;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (squat_trie_build_init(trie, &build_ctx) < 0)
087b8ac8c65a5e96a95da506450a91e8f52ae4eeTimo Sirainen valid = buffer_create_dynamic(default_pool, 4096);
e93184a9055c2530366dfe617e07199603c399ddMartti Rannanjärvi input = i_stream_create_fd(fd, (size_t)-1);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen while (ret == 0 && (line = i_stream_read_next_line(input)) != NULL) {
74ab5ea66c0c4b388f1c774ae6a47ab94f1b4f18Timo Sirainen fprintf(stderr, "\r%ukB", (unsigned)(input->v_offset/1024));
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen if (strncasecmp(line, "Content-Type:", 13) == 0 &&
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen strncasecmp(line, "Content-Type: text/", 19) != 0 &&
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen strncasecmp(line, "Content-Type: message/", 22) != 0)
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen else if (strncasecmp(line, "Content-Transfer-Encoding: base64", 33) == 0)
ec83cfdc3fbcb9cbd55110e37783946a4bc0b656Timo Sirainen /* we're actually indexing here headers as bodies and bodies
ec83cfdc3fbcb9cbd55110e37783946a4bc0b656Timo Sirainen as headers. it doesn't really matter in this test, and
ec83cfdc3fbcb9cbd55110e37783946a4bc0b656Timo Sirainen fixing it would require storing headers temporarily
ec83cfdc3fbcb9cbd55110e37783946a4bc0b656Timo Sirainen elsewhere and index them only after the body */
ec83cfdc3fbcb9cbd55110e37783946a4bc0b656Timo Sirainen index_type = !data_header ? SQUAT_INDEX_TYPE_HEADER :
2a8e2b9e3d92fd6449aa576369a5eb8b6b2b9c14Timo Sirainen if (uni_utf8_get_valid_data((const unsigned char *)line,
2a8e2b9e3d92fd6449aa576369a5eb8b6b2b9c14Timo Sirainen ret = squat_trie_build_more(build_ctx, seq, index_type,
087b8ac8c65a5e96a95da506450a91e8f52ae4eeTimo Sirainen ret = squat_trie_build_more(build_ctx, seq, index_type,
1caf757864e7734345660e7d190f84e42668a6f8Timo Sirainen if (squat_trie_build_deinit(&build_ctx, NULL) < 0)
74ab5ea66c0c4b388f1c774ae6a47ab94f1b4f18Timo Sirainen cputime = (double)(clock_end - clock_start) / CLOCKS_PER_SEC;
74ab5ea66c0c4b388f1c774ae6a47ab94f1b4f18Timo Sirainen fprintf(stderr, "\n - Index time: %.2f CPU seconds, "
74ab5ea66c0c4b388f1c774ae6a47ab94f1b4f18Timo Sirainen "%.2f real seconds (%.02fMB/CPUs)\n", cputime,
7ef3553585e556f35d5919589cfdc1de3329e4bbTimo Sirainen timeval_diff_msecs(&tv_end, &tv_start)/1000.0,
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen trie_mem = squat_trie_mem_used(trie, &node_count);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen uidlist_mem = squat_uidlist_mem_used(squat_trie_get_uidlist(trie),
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen fprintf(stderr, " - memory: %uk for trie, %uk for uidlist\n",
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen (unsigned)(trie_mem/1024), (unsigned)(uidlist_mem/1024));
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen fprintf(stderr, " - %"PRIuUOFF_T" bytes in %u nodes (%.02f%%)\n",
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen trie_st.st_size / (float)input->v_offset * 100.0);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen fprintf(stderr, " - %"PRIuUOFF_T" bytes in %u UID lists (%.02f%%)\n",
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen uidlist_st.st_size / (float)input->v_offset * 100.0);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen fprintf(stderr, " - %"PRIuUOFF_T" bytes total of %"
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen (trie_st.st_size + uidlist_st.st_size), input->v_offset,
74ab5ea66c0c4b388f1c774ae6a47ab94f1b4f18Timo Sirainen while ((str = fgets(buf, sizeof(buf), stdin)) != NULL) {
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen ret = squat_trie_lookup(trie, str, SQUAT_INDEX_TYPE_HEADER |