178N/A/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */
178N/A
178N/A#include "lib.h"
178N/A#include "unichar.h"
178N/A#include "str.h"
178N/A#include "str-sanitize.h"
178N/A
178N/Astatic size_t str_sanitize_skip_start(const char *src, size_t max_bytes)
178N/A{
178N/A unichar_t chr;
178N/A size_t i;
178N/A
178N/A for (i = 0; i < max_bytes && src[i] != '\0'; ) {
178N/A int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
178N/A if (len <= 0)
178N/A break;
178N/A if ((unsigned char)src[i] < 32)
178N/A break;
178N/A i += len;
178N/A }
178N/A i_assert(i <= max_bytes);
178N/A return i;
178N/A}
178N/A
178N/Astatic void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos)
178N/A{
178N/A const unsigned char *data = str_data(dest);
178N/A size_t len = str_len(dest);
178N/A
178N/A if (len == initial_pos)
178N/A return;
178N/A
178N/A i_assert(len > 0);
178N/A if ((data[len-1] & 0x80) == 0) {
2015N/A str_truncate(dest, len-1);
178N/A return;
178N/A }
178N/A /* truncate UTF-8 sequence. */
178N/A while (len > 0 && (data[len-1] & 0xc0) == 0x80)
178N/A len--;
178N/A if (len > 0 && (data[len-1] & 0xc0) == 0xc0)
178N/A len--;
178N/A if (len >= initial_pos)
178N/A str_truncate(dest, len);
178N/A}
178N/A
178N/Avoid str_sanitize_append(string_t *dest, const char *src, size_t max_bytes)
178N/A{
178N/A size_t initial_pos = str_len(dest);
178N/A unichar_t chr;
178N/A size_t i;
178N/A
178N/A for (i = 0; i < max_bytes && src[i] != '\0'; ) {
178N/A int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
178N/A if (len == 0)
178N/A break; /* input ended too early */
178N/A
178N/A if (len < 0) {
178N/A /* invalid UTF-8 */
178N/A str_append_c(dest, '?');
178N/A i++;
continue;
}
if ((unsigned char)src[i] < 32)
str_append_c(dest, '?');
else
str_append_n(dest, src+i, len);
i += len;
}
if (src[i] != '\0') {
if (max_bytes < 3)
str_truncate(dest, initial_pos);
else {
while (str_len(dest) - initial_pos > max_bytes-3)
str_sanitize_truncate_char(dest, initial_pos);
}
str_append(dest, "...");
}
}
const char *str_sanitize(const char *src, size_t max_bytes)
{
string_t *str;
size_t i;
if (src == NULL)
return NULL;
i = str_sanitize_skip_start(src, max_bytes);
if (src[i] == '\0')
return src;
str = t_str_new(I_MIN(max_bytes, 256));
str_sanitize_append(str, src, max_bytes);
return str_c(str);
}