bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
211c638d81d382517d196ad47565e0d85012c927klemens/* Implemented against draft-ietf-imapext-sort-10 and
fd4632d0060b2e9eef513b544ccff1e26d1fc222Timo Sirainen draft-ietf-imapext-thread-12 */
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* check if we need to do anything */
d64e7a4e2aa694a4197c392e4531a9df13c95b36Timo Sirainen if (*data == '\t' || *data == '\n' || *data == '\r' ||
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen (*data == ' ' && (data[1] == ' ' || data[1] == '\t')))
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* @UNSAFE: convert/pack the whitespace */
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen buffer_set_used_size(buf, (size_t) (dest - data)+1);
ff7f956ac78bd665a536daf25b33266f2e33b7c0Timo Sirainenstatic void remove_subj_trailers(buffer_t *buf, size_t start_pos,
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* subj-trailer = "(fwd)" / WSP */
f72ce8a49e82ced95ea3b437b0e17f27982e617eTimo Sirainen if (orig_size < 1) /* size includes trailing \0 */
f72ce8a49e82ced95ea3b437b0e17f27982e617eTimo Sirainen for (size = orig_size-1; size > start_pos; ) {
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen while (*data != '\0' && *data != '[' && *data != ']')
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainenstatic bool remove_subj_leader(buffer_t *buf, size_t *start_pos,
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* subj-leader = (*subj-blob subj-refwd) / WSP
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen subj-blob = "[" *BLOBCHAR "]" *WSP
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":"
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen BLOBCHAR = %x01-5a / %x5c / %x5e-7f
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen ; any CHAR except '[' and ']' */
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* independent from checks below - always removed */
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainenstatic bool remove_blob_when_nonempty(buffer_t *buf, size_t *start_pos)
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen if (*data == '[' && remove_blob(&data) && *data != '\0') {
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainenstatic bool remove_subj_fwd_hdr(buffer_t *buf, size_t *start_pos,
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* subj-fwd = subj-fwd-hdr subject subj-fwd-trl
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen subj-fwd-hdr = "[fwd:"
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen subj-fwd-trl = "]" */
f72ce8a49e82ced95ea3b437b0e17f27982e617eTimo Sirainen if (strncmp(data + *start_pos, "[FWD:", 5) != 0)
fd4632d0060b2e9eef513b544ccff1e26d1fc222Timo Sirainenconst char *imap_get_base_subject_cased(pool_t pool, const char *subject,
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen buf = buffer_create_dynamic(pool, subject_len);
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* (1) Convert any RFC 2047 encoded-words in the subject to
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen UTF-8. Convert all tabs and continuations to space.
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen Convert all multiple spaces to a single space. */
bd05299ba9a77310d76cf8aca447d748aeeaa75aTimo Sirainen message_header_decode_utf8((const unsigned char *)subject, subject_len,
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* (2) Remove all trailing text of the subject that matches
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen the subj-trailer ABNF, repeat until no more matches are
ff7f956ac78bd665a536daf25b33266f2e33b7c0Timo Sirainen remove_subj_trailers(buf, start_pos, is_reply_or_forward_r);
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* (3) Remove all prefix text of the subject that
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen matches the subj-leader ABNF. */
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* (4) If there is prefix text of the subject that
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen matches the subj-blob ABNF, and removing that prefix
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen leaves a non-empty subj-base, then remove the prefix
ff7f956ac78bd665a536daf25b33266f2e33b7c0Timo Sirainen found = remove_blob_when_nonempty(buf, &start_pos) ||
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* (5) Repeat (3) and (4) until no matches remain. */
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* (6) If the resulting text begins with the subj-fwd-hdr ABNF
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen and ends with the subj-fwd-trl ABNF, remove the
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen subj-fwd-hdr and subj-fwd-trl and repeat from step (2). */
ff7f956ac78bd665a536daf25b33266f2e33b7c0Timo Sirainen } while (remove_subj_fwd_hdr(buf, &start_pos, is_reply_or_forward_r));
ffed94939ce223a7c801aec3d90b45198dd4462dTimo Sirainen /* (7) The resulting text is the "base subject" used in the