fts-build-mail.c revision 5518182f1165884742f9eb37eb2e6136b29394e8
/* Copyright (c) 2006-2015 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "istream.h"
#include "buffer.h"
#include "str.h"
#include "rfc822-parser.h"
#include "message-address.h"
#include "message-parser.h"
#include "message-decoder.h"
#include "mail-storage.h"
#include "fts-parser.h"
#include "fts-api-private.h"
#include "fts-build-mail.h"
/* there are other characters as well, but this doesn't have to be exact */
#define IS_WORD_WHITESPACE(c) \
((c) == ' ' || (c) == '\t' || (c) == '\n')
/* if we see a word larger than this, just go ahead and split it from
wherever */
#define MAX_WORD_SIZE 1024
struct fts_mail_build_context {
struct fts_backend_update_context *update_ctx;
char *content_type, *content_disposition;
struct fts_parser *body_parser;
};
const struct message_header_line *hdr)
{
struct rfc822_parser_context parser;
T_BEGIN {
ctx->content_type =
}
} T_END;
}
static void
const struct message_header_line *hdr)
{
/* just pass it as-is to backend. */
}
const struct message_block *raw_block)
{
}
static void
const struct message_header_line *hdr)
{
unsigned int i;
/* @UNSAFE: if there are any NULs, replace them with spaces */
for (i = 0; i < hdr->full_value_len; i++) {
if (data[i] == '\0') {
}
buf[i] = ' ';
}
}
}
const struct message_block *block)
{
struct fts_backend_build_key key;
return;
/* hdr->full_value is always set because we get the block from
message_decoder */
return;
/* regular unstructured header */
} else T_BEGIN {
/* message address. normalize it to give better
search results. */
struct message_address *addr;
} T_END;
}
static bool
{
struct mail_storage *storage;
const char *content_type;
struct fts_backend_build_key key;
*binary_body_r = FALSE;
/* multiparts are never indexed, only their contents */
return FALSE;
}
&ctx->body_parser)) {
/* extract text using the the returned parser */
*binary_body_r = TRUE;
/* text body parts */
} else {
/* possibly binary */
return FALSE;
*binary_body_r = TRUE;
}
}
{
unsigned int i;
FTS_BACKEND_FLAG_BUILD_FULL_WORDS) == 0) {
}
/* we'll need to send only full words to the backend */
/* continuing previous word */
break;
}
!last) {
/* word is still not finished */
return 0;
}
/* we have a full word, index it */
return -1;
}
/* find the boundary for last word */
if (last)
else {
break;
}
}
return -1;
}
}
return 0;
}
{
struct message_block block;
int ret = 0;
do {
ret = -1;
break;
}
ret = -1;
return ret;
}
static int
{
struct fts_mail_build_context ctx;
struct message_parser_ctx *parser;
struct message_decoder_context *decoder;
bool binary_body;
int ret;
return 0;
i_error("Failed to read mailbox %s mail UID=%u stream: %s",
return -1;
}
0);
for (;;) {
if (ret < 0) {
if (input->stream_errno == 0)
ret = 0;
break;
}
/* body part changed. we're now parsing the end of
boundary, possibly followed by message epilogue */
if (fts_body_parser_finish(&ctx) < 0) {
ret = -1;
break;
}
}
/* multipart. skip until beginning of next
part's headers */
}
}
/* always handle headers */
/* end of headers */
&binary_body);
if (binary_body)
} else {
if (skip_body)
continue;
}
&block))
continue;
/* end of headers */
} else {
ret = -1;
break;
}
body_added = TRUE;
}
}
if (ret == 0)
else
}
/* make sure body is added even when it doesn't exist */
}
}
{
int ret;
T_BEGIN {
} T_END;
return ret;
}