str-sanitize.c revision 6aadd1c52e6b291d47b47b4f4063e9bc8ccf0784
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen/* Copyright (c) 2004-2015 Dovecot authors, see the included COPYING file */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "lib.h"
bdd36cfdba3ff66d25570a9ff568d69e1eb543cfTimo Sirainen#include "unichar.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "str.h"
9c6a09aa16095ff72837799a37e0e3b3e93bb3d8Timo Sirainen#include "str-sanitize.h"
31a9637b38d37451b649c86301b2c12e53a7810eTimo Sirainen
9c6a09aa16095ff72837799a37e0e3b3e93bb3d8Timo Sirainenstatic size_t str_sanitize_skip_start(const char *src, size_t max_bytes)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen unichar_t chr;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen size_t i;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen for (i = 0; i < max_bytes && src[i] != '\0'; ) {
c6335901c67a4c9365319190a111a2168f3b06f5Timo Sirainen int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
c6335901c67a4c9365319190a111a2168f3b06f5Timo Sirainen if (len <= 0)
c6335901c67a4c9365319190a111a2168f3b06f5Timo Sirainen break;
01230de017cd273de41143d88e9c18df1243ae8aTimo Sirainen if ((unsigned char)src[i] < 32)
b7b9d4be2a1ff399026a5d6feeffd3a048f22be0Timo Sirainen break;
b7b9d4be2a1ff399026a5d6feeffd3a048f22be0Timo Sirainen i += len;
047c00cd3f7f403672f81569413669238df8c15aTimo Sirainen }
1f9d1bedae25d86f26c239055c5487499dfeeb58Timo Sirainen i_assert(i <= max_bytes);
047c00cd3f7f403672f81569413669238df8c15aTimo Sirainen return i;
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen}
f37ecd72aad9b806aae83f71bacafdce32146945Timo Sirainen
f37ecd72aad9b806aae83f71bacafdce32146945Timo Sirainenstatic void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos)
e2bdca8201e4aa1cd31332ffbdd4c6eef9151d5eTimo Sirainen{
e2bdca8201e4aa1cd31332ffbdd4c6eef9151d5eTimo Sirainen const unsigned char *data = str_data(dest);
e2bdca8201e4aa1cd31332ffbdd4c6eef9151d5eTimo Sirainen unsigned int len = str_len(dest);
32b78da5dfbbf6a06b3dbdc9278c60b55714f9bcTimo Sirainen
32b78da5dfbbf6a06b3dbdc9278c60b55714f9bcTimo Sirainen if (len == initial_pos)
32b78da5dfbbf6a06b3dbdc9278c60b55714f9bcTimo Sirainen return;
32b78da5dfbbf6a06b3dbdc9278c60b55714f9bcTimo Sirainen if ((data[len-1] & 0x80) == 0) {
8b5c520883aa37bb55646286d375fdbae294d710Timo Sirainen str_truncate(dest, len-1);
8b5c520883aa37bb55646286d375fdbae294d710Timo Sirainen return;
0679f8a70a8dda43b204ae35fc6a903818cc6584Timo Sirainen }
0679f8a70a8dda43b204ae35fc6a903818cc6584Timo Sirainen /* truncate UTF-8 sequence. */
0679f8a70a8dda43b204ae35fc6a903818cc6584Timo Sirainen while (len > 0 && (data[len-1] & 0xc0) == 0x80)
0679f8a70a8dda43b204ae35fc6a903818cc6584Timo Sirainen len--;
b63e20ea9bc84f1aa90a551f217d01385e070b73Timo Sirainen if (len > 0 && (data[len-1] & 0xc0) == 0xc0)
b63e20ea9bc84f1aa90a551f217d01385e070b73Timo Sirainen len--;
b63e20ea9bc84f1aa90a551f217d01385e070b73Timo Sirainen if (len >= initial_pos)
b63e20ea9bc84f1aa90a551f217d01385e070b73Timo Sirainen str_truncate(dest, len);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomivoid str_sanitize_append(string_t *dest, const char *src, size_t max_bytes)
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi{
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi unsigned int initial_pos = str_len(dest);
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi unichar_t chr;
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi size_t i;
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi for (i = 0; i < max_bytes && src[i] != '\0'; ) {
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi if (len == 0)
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi break; /* input ended too early */
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (len < 0) {
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen /* invalid UTF-8 */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen str_append_c(dest, '?');
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen i++;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen continue;
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen }
f7423cbbd9dea363a5df18ebb96da055a977ae79Timo Sirainen if ((unsigned char)src[i] < 32)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen str_append_c(dest, '?');
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen else
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen str_append_n(dest, src+i, len);
72f21884c0bb9bb26edad63623427ac2120901eaStephan Bosch i += len;
009217abb57a24a4076092e8e4e165545747839eStephan Bosch }
72f21884c0bb9bb26edad63623427ac2120901eaStephan Bosch
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen if (src[i] != '\0') {
72f21884c0bb9bb26edad63623427ac2120901eaStephan Bosch if (max_bytes < 3)
009217abb57a24a4076092e8e4e165545747839eStephan Bosch str_truncate(dest, initial_pos);
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen else {
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi while (str_len(dest) - initial_pos > max_bytes-3)
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi str_sanitize_truncate_char(dest, initial_pos);
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi }
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi str_append(dest, "...");
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi }
b68b98e1545bad8af9cb58ef89e8d7f6e16577beAki Tuomi}
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainenconst char *str_sanitize(const char *src, size_t max_bytes)
0dffa25d211be541ee3c953b23566a1a990789dfTimo Sirainen{
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen string_t *str;
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen size_t i;
0dffa25d211be541ee3c953b23566a1a990789dfTimo Sirainen
db693bf6fcae96d834567f1782257517b7207655Timo Sirainen if (src == NULL)
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen return NULL;
541f258d86b2db26efd5670883966183b4fb6323Timo Sirainen
0dffa25d211be541ee3c953b23566a1a990789dfTimo Sirainen i = str_sanitize_skip_start(src, max_bytes);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (src[i] == '\0')
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return src;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
db693bf6fcae96d834567f1782257517b7207655Timo Sirainen str = t_str_new(I_MIN(max_bytes, 256));
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen str_sanitize_append(str, src, max_bytes);
4d4d6d4745682790c20d759ba93dbea46b812c5dTimo Sirainen return str_c(str);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen