extract-word.c revision 4f5dd3943bef8a04be7e3b838b822bb9a7ad6cb3
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen This file is part of systemd.
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen Copyright 2010 Lennart Poettering
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen systemd is free software; you can redistribute it and/or modify it
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen under the terms of the GNU Lesser General Public License as published by
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen the Free Software Foundation; either version 2.1 of the License, or
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen (at your option) any later version.
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen systemd is distributed in the hope that it will be useful, but
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen WITHOUT ANY WARRANTY; without even the implied warranty of
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen Lesser General Public License for more details.
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen You should have received a copy of the GNU Lesser General Public License
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen along with systemd; If not, see <http://www.gnu.org/licenses/>.
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersenint extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering bool backslash = false; /* whether we've just seen a backslash */
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering bool separator = false; /* whether we've just seen a separator */
6bedfcbb2970e06a4d3280c8fb62083d252ede73Lennart Poettering bool start = true; /* false means we're looking at a value */
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering /* Bail early if called after last value or with no input */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* Parses the first word of a string, and returns it in
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * *ret. Removes all quotes in the process. When parsing fails
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * (because of an uneven number of quotes or similar), leaves
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * the pointer *p at the first invalid character. */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen char c = **p;
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* We found a non-blank character, so we will always
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * want to return a string (even if it is empty),
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * allocate it here. */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen if (c == 0) {
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* If we find an unquoted trailing backslash and we're in
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * Unbalanced quotes will only be allowed in EXTRACT_RELAX
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen (*p) += r - 1;
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen } else if (quote) { /* inside either single or double quotes */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen if (c == 0) {
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen } else if (c == quote) /* found the end quote */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen else if (c == '\\')
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES))
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen else if (c == '\\')
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen const char **p,
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen const char *rvalue) {
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* Try to unquote it, if it fails, warn about it and try again
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * backslashes verbatim in invalid escape sequences. */
e53fc357a9bb9d0a5362ccc4246d598cb0febd5eLennart Poettering r = extract_first_word(p, ret, separators, flags);
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen if (r >= 0) {
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* Can be any error, report it */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersenint extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen int n = 0, i, c, r;
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* Parses a number of words from a string, stripping any
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * quotes if necessary. */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* Count how many words are expected */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* Read all words into a temporary array */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen l = newa0(char*, n);
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen for (c = 0; c < n; c++) {
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen r = extract_first_word(p, &l[c], separators, flags);
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen for (j = 0; j < c; j++)
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen /* If we managed to parse all words, return them in the passed
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen * in parameters */
57fa1d094cd2c5ac68970526ad0a0754c548e75dTom Gundersen for (i = 0; i < n; i++) {