/* Copyright (c) 2016-2018 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "unichar.h"
#include "fts-tokenizer-common.h"
void
fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
size_t *len)
{
size_t pos;
unsigned int char_bytes;
/* the token is truncated - make sure the last character
exists entirely in the token */
for (pos = *len-1; pos > 0; pos--) {
if (UTF8_IS_START_SEQ(data[pos]))
break;
}
char_bytes = uni_utf8_char_bytes(data[pos]);
if (char_bytes != *len-pos) {
i_assert(char_bytes > *len-pos);
*len = pos;
}
}
void fts_tokenizer_delete_trailing_invalid_char(const unsigned char *data,
size_t *len)
{
size_t pos = *len;
/* the token may contain '.' in the end - remove all of them. */
while (pos > 0 &&
(data[pos-1] == '.' || data[pos-1] == '-'))
pos--;
*len = pos;
}