14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi/* Copyright (c) 2016-2018 Dovecot authors, see the included COPYING file */
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi#include "lib.h"
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi#include "unichar.h"
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi#include "fts-tokenizer-common.h"
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomivoid
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomifts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi size_t *len)
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi{
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi size_t pos;
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi unsigned int char_bytes;
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi /* the token is truncated - make sure the last character
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi exists entirely in the token */
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi for (pos = *len-1; pos > 0; pos--) {
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi if (UTF8_IS_START_SEQ(data[pos]))
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi break;
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi }
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi char_bytes = uni_utf8_char_bytes(data[pos]);
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi if (char_bytes != *len-pos) {
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi i_assert(char_bytes > *len-pos);
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi *len = pos;
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi }
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi}
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomivoid fts_tokenizer_delete_trailing_invalid_char(const unsigned char *data,
fdf3e1e28e824a562b895c8c6b5d77d70146d357Josef 'Jeff' Sipek size_t *len)
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi{
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi size_t pos = *len;
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi /* the token may contain '.' in the end - remove all of them. */
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi while (pos > 0 &&
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi (data[pos-1] == '.' || data[pos-1] == '-'))
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi pos--;
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi *len = pos;
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi}
14a7cd46677cc0052319f2cd84a7b720efa60499Aki Tuomi