extract-word.c revision 27fc921b658adc5baa988c4c213888b016a60b18
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek/***
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek This file is part of systemd.
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek Copyright 2010 Lennart Poettering
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek systemd is free software; you can redistribute it and/or modify it
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek under the terms of the GNU Lesser General Public License as published by
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek the Free Software Foundation; either version 2.1 of the License, or
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek (at your option) any later version.
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek systemd is distributed in the hope that it will be useful, but
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek WITHOUT ANY WARRANTY; without even the implied warranty of
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek Lesser General Public License for more details.
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek You should have received a copy of the GNU Lesser General Public License
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek along with systemd; If not, see <http://www.gnu.org/licenses/>.
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek***/
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek
613b411c947635136637f8cdd66b94512f761eabLennart Poettering#include "alloc-util.h"
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek#include "escape.h"
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek#include "extract-word.h"
0b452006de98294d1690f045f6ea2f7f6630ec3bRonny Chevalier#include "string-util.h"
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering#include "utf8.h"
07630cea1f3a845c09309f197ac7c4f11edd3b62Lennart Poettering#include "util.h"
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmekint extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek _cleanup_free_ char *s = NULL;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering size_t allocated = 0, sz = 0;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering char c;
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek int r;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering char quote = 0; /* 0 or ' or " */
613b411c947635136637f8cdd66b94512f761eabLennart Poettering bool backslash = false; /* whether we've just seen a backslash */
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek bool separator = false; /* whether we've just seen a separator */
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering assert(p);
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek assert(ret);
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering /* Bail early if called after last value or with no input */
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek if (!*p)
613b411c947635136637f8cdd66b94512f761eabLennart Poettering goto finish_force_terminate;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering c = **p;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
63c372cb9df3bee01e3bf8cd7f96f336bddda846Lennart Poettering if (!separators)
63c372cb9df3bee01e3bf8cd7f96f336bddda846Lennart Poettering separators = WHITESPACE;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering /* Parses the first word of a string, and returns it in
613b411c947635136637f8cdd66b94512f761eabLennart Poettering * *ret. Removes all quotes in the process. When parsing fails
613b411c947635136637f8cdd66b94512f761eabLennart Poettering * (because of an uneven number of quotes or similar), leaves
613b411c947635136637f8cdd66b94512f761eabLennart Poettering * the pointer *p at the first invalid character. */
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (!GREEDY_REALLOC(s, allocated, sz+1))
613b411c947635136637f8cdd66b94512f761eabLennart Poettering return -ENOMEM;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering for (;; (*p) ++, c = **p) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (c == 0)
613b411c947635136637f8cdd66b94512f761eabLennart Poettering goto finish_force_terminate;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering else if (strchr(separators, c)) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering (*p) ++;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering goto finish_force_next;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering }
3d94f76c99da13e5603831d0b278f8c8c21bcb02Lennart Poettering } else {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering /* We found a non-blank character, so we will always
613b411c947635136637f8cdd66b94512f761eabLennart Poettering * want to return a string (even if it is empty),
613b411c947635136637f8cdd66b94512f761eabLennart Poettering * allocate it here. */
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (!GREEDY_REALLOC(s, allocated, sz+1))
613b411c947635136637f8cdd66b94512f761eabLennart Poettering return -ENOMEM;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering break;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering }
613b411c947635136637f8cdd66b94512f761eabLennart Poettering }
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering for (;; (*p) ++, c = **p) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (backslash) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (!GREEDY_REALLOC(s, allocated, sz+7))
613b411c947635136637f8cdd66b94512f761eabLennart Poettering return -ENOMEM;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (c == 0) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
613b411c947635136637f8cdd66b94512f761eabLennart Poettering (!quote || flags & EXTRACT_RELAX)) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering /* If we find an unquoted trailing backslash and we're in
613b411c947635136637f8cdd66b94512f761eabLennart Poettering * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
613b411c947635136637f8cdd66b94512f761eabLennart Poettering * output.
613b411c947635136637f8cdd66b94512f761eabLennart Poettering *
613b411c947635136637f8cdd66b94512f761eabLennart Poettering * Unbalanced quotes will only be allowed in EXTRACT_RELAX
613b411c947635136637f8cdd66b94512f761eabLennart Poettering * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
613b411c947635136637f8cdd66b94512f761eabLennart Poettering */
613b411c947635136637f8cdd66b94512f761eabLennart Poettering s[sz++] = '\\';
613b411c947635136637f8cdd66b94512f761eabLennart Poettering goto finish_force_terminate;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering }
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (flags & EXTRACT_RELAX)
613b411c947635136637f8cdd66b94512f761eabLennart Poettering goto finish_force_terminate;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering return -EINVAL;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering }
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (flags & EXTRACT_CUNESCAPE) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering uint32_t u;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering r = cunescape_one(*p, (size_t) -1, &c, &u);
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (r < 0) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (flags & EXTRACT_CUNESCAPE_RELAX) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering s[sz++] = '\\';
613b411c947635136637f8cdd66b94512f761eabLennart Poettering s[sz++] = c;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering } else
613b411c947635136637f8cdd66b94512f761eabLennart Poettering return -EINVAL;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering } else {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering (*p) += r - 1;
613b411c947635136637f8cdd66b94512f761eabLennart Poettering
613b411c947635136637f8cdd66b94512f761eabLennart Poettering if (c != 0)
613b411c947635136637f8cdd66b94512f761eabLennart Poettering s[sz++] = c; /* normal explicit char */
613b411c947635136637f8cdd66b94512f761eabLennart Poettering else
613b411c947635136637f8cdd66b94512f761eabLennart Poettering sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
613b411c947635136637f8cdd66b94512f761eabLennart Poettering }
613b411c947635136637f8cdd66b94512f761eabLennart Poettering } else
613b411c947635136637f8cdd66b94512f761eabLennart Poettering s[sz++] = c;
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek backslash = false;
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering } else if (quote) { /* inside either single or double quotes */
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering for (;; (*p) ++, c = **p) {
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering if (c == 0) {
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek if (flags & EXTRACT_RELAX)
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering goto finish_force_terminate;
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering return -EINVAL;
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering } else if (c == quote) { /* found the end quote */
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering quote = 0;
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering break;
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering } else if (c == '\\') {
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering backslash = true;
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering break;
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering } else {
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering if (!GREEDY_REALLOC(s, allocated, sz+2))
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering return -ENOMEM;
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering s[sz++] = c;
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering }
6b46ea73e3b1d8a1e65f58ac04772821bd4a72fbLennart Poettering }
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek
613b411c947635136637f8cdd66b94512f761eabLennart Poettering } else if (separator) {
613b411c947635136637f8cdd66b94512f761eabLennart Poettering for (;; (*p) ++, c = **p) {
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek if (c == 0)
d8c9d3a468e61ee2a2b2c3454e662398b0885411Zbigniew Jędrzejewski-Szmek goto finish_force_terminate;
if (!strchr(separators, c))
goto finish;
}
} else {
for (;; (*p) ++, c = **p) {
if (c == 0)
goto finish_force_terminate;
else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
quote = c;
break;
} else if (c == '\\') {
backslash = true;
break;
} else if (strchr(separators, c)) {
if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
(*p) ++;
goto finish_force_next;
}
separator = true;
break;
} else {
if (!GREEDY_REALLOC(s, allocated, sz+2))
return -ENOMEM;
s[sz++] = c;
}
}
}
}
finish_force_terminate:
*p = NULL;
finish:
if (!s) {
*p = NULL;
*ret = NULL;
return 0;
}
finish_force_next:
s[sz] = 0;
*ret = s;
s = NULL;
return 1;
}
int extract_first_word_and_warn(
const char **p,
char **ret,
const char *separators,
ExtractFlags flags,
const char *unit,
const char *filename,
unsigned line,
const char *rvalue) {
/* Try to unquote it, if it fails, warn about it and try again
* but this time using EXTRACT_CUNESCAPE_RELAX to keep the
* backslashes verbatim in invalid escape sequences. */
const char *save;
int r;
save = *p;
r = extract_first_word(p, ret, separators, flags);
if (r >= 0)
return r;
if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
/* Retry it with EXTRACT_CUNESCAPE_RELAX. */
*p = save;
r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
if (r >= 0) {
/* It worked this time, hence it must have been an invalid escape sequence we could correct. */
log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
return r;
}
/* If it's still EINVAL; then it must be unbalanced quoting, report this. */
if (r == -EINVAL)
return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
}
/* Can be any error, report it */
return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
}
int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
va_list ap;
char **l;
int n = 0, i, c, r;
/* Parses a number of words from a string, stripping any
* quotes if necessary. */
assert(p);
/* Count how many words are expected */
va_start(ap, flags);
for (;;) {
if (!va_arg(ap, char **))
break;
n++;
}
va_end(ap);
if (n <= 0)
return 0;
/* Read all words into a temporary array */
l = newa0(char*, n);
for (c = 0; c < n; c++) {
r = extract_first_word(p, &l[c], separators, flags);
if (r < 0) {
int j;
for (j = 0; j < c; j++)
free(l[j]);
return r;
}
if (r == 0)
break;
}
/* If we managed to parse all words, return them in the passed
* in parameters */
va_start(ap, flags);
for (i = 0; i < n; i++) {
char **v;
v = va_arg(ap, char **);
assert(v);
*v = l[i];
}
va_end(ap);
return c;
}