escape.c revision c932fb71cc90461b88ecdffe47c071d001d78fb4
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen/***
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen This file is part of systemd.
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen Copyright 2010 Lennart Poettering
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen systemd is free software; you can redistribute it and/or modify it
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen under the terms of the GNU Lesser General Public License as published by
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen the Free Software Foundation; either version 2.1 of the License, or
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen (at your option) any later version.
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen systemd is distributed in the hope that it will be useful, but
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen WITHOUT ANY WARRANTY; without even the implied warranty of
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen Lesser General Public License for more details.
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen You should have received a copy of the GNU Lesser General Public License
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen along with systemd; If not, see <http://www.gnu.org/licenses/>.
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen***/
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen#include <errno.h>
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen#include <stdlib.h>
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen#include <string.h>
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen#include "alloc-util.h"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen#include "escape.h"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen#include "hexdecoct.h"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen#include "macro.h"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen#include "utf8.h"
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersensize_t cescape_char(char c, char *buf) {
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen char * buf_old = buf;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen switch (c) {
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\a':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = 'a';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\b':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = 'b';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\f':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = 'f';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\n':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = 'n';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\r':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = 'r';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\t':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = 't';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\v':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = 'v';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\\':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '"':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '"';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\'':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\'';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen default:
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen /* For special chars we prefer octal over
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen * hexadecimal encoding, simply because glib's
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen * g_strescape() does the same */
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen if ((c < ' ') || (c >= 127)) {
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = octchar((unsigned char) c >> 6);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = octchar((unsigned char) c >> 3);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = octchar((unsigned char) c);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen } else
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *(buf++) = c;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen }
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen return buf - buf_old;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen}
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersenchar *cescape_length(const char *s, size_t n) {
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen const char *f;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen char *r, *t;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen assert(s || n == 0);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen /* Does C style string escaping. May be reversed with
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen * cunescape(). */
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen r = new(char, n*4 + 1);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen if (!r)
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen return NULL;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen for (f = s, t = r; f < s + n; f++)
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen t += cescape_char(*f, t);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *t = 0;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen return r;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen}
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersenchar *cescape(const char *s) {
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen assert(s);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen return cescape_length(s, strlen(s));
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen}
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersenint cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen int r = 1;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen assert(p);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen assert(*p);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen assert(ret);
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen /* Unescapes C style. Returns the unescaped character in ret.
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen * Sets *eight_bit to true if the escaped sequence either fits in
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen * one byte in UTF-8 or is a non-unicode literal byte and should
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen * instead be copied directly.
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen */
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen if (length != (size_t) -1 && length < 1)
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen return -EINVAL;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen switch (p[0]) {
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case 'a':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *ret = '\a';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case 'b':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *ret = '\b';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case 'f':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *ret = '\f';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case 'n':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *ret = '\n';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case 'r':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *ret = '\r';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case 't':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *ret = '\t';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case 'v':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *ret = '\v';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\\':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *ret = '\\';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '"':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *ret = '"';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen case '\'':
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen *ret = '\'';
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen break;
f579559b3a14c1f1ef96c372e7626c4733e6ef7dTom Gundersen
case 's':
/* This is an extension of the XDG syntax files */
*ret = ' ';
break;
case 'x': {
/* hexadecimal encoding */
int a, b;
if (length != (size_t) -1 && length < 3)
return -EINVAL;
a = unhexchar(p[1]);
if (a < 0)
return -EINVAL;
b = unhexchar(p[2]);
if (b < 0)
return -EINVAL;
/* Don't allow NUL bytes */
if (a == 0 && b == 0)
return -EINVAL;
*ret = (a << 4U) | b;
*eight_bit = true;
r = 3;
break;
}
case 'u': {
/* C++11 style 16bit unicode */
int a[4];
unsigned i;
uint32_t c;
if (length != (size_t) -1 && length < 5)
return -EINVAL;
for (i = 0; i < 4; i++) {
a[i] = unhexchar(p[1 + i]);
if (a[i] < 0)
return a[i];
}
c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
/* Don't allow 0 chars */
if (c == 0)
return -EINVAL;
*ret = c;
r = 5;
break;
}
case 'U': {
/* C++11 style 32bit unicode */
int a[8];
unsigned i;
char32_t c;
if (length != (size_t) -1 && length < 9)
return -EINVAL;
for (i = 0; i < 8; i++) {
a[i] = unhexchar(p[1 + i]);
if (a[i] < 0)
return a[i];
}
c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
/* Don't allow 0 chars */
if (c == 0)
return -EINVAL;
/* Don't allow invalid code points */
if (!unichar_is_valid(c))
return -EINVAL;
*ret = c;
r = 9;
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': {
/* octal encoding */
int a, b, c;
char32_t m;
if (length != (size_t) -1 && length < 3)
return -EINVAL;
a = unoctchar(p[0]);
if (a < 0)
return -EINVAL;
b = unoctchar(p[1]);
if (b < 0)
return -EINVAL;
c = unoctchar(p[2]);
if (c < 0)
return -EINVAL;
/* don't allow NUL bytes */
if (a == 0 && b == 0 && c == 0)
return -EINVAL;
/* Don't allow bytes above 255 */
m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
if (m > 255)
return -EINVAL;
*ret = m;
*eight_bit = true;
r = 3;
break;
}
default:
return -EINVAL;
}
return r;
}
int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
char *r, *t;
const char *f;
size_t pl;
assert(s);
assert(ret);
/* Undoes C style string escaping, and optionally prefixes it. */
pl = prefix ? strlen(prefix) : 0;
r = new(char, pl+length+1);
if (!r)
return -ENOMEM;
if (prefix)
memcpy(r, prefix, pl);
for (f = s, t = r + pl; f < s + length; f++) {
size_t remaining;
bool eight_bit = false;
char32_t u;
int k;
remaining = s + length - f;
assert(remaining > 0);
if (*f != '\\') {
/* A literal literal, copy verbatim */
*(t++) = *f;
continue;
}
if (remaining == 1) {
if (flags & UNESCAPE_RELAX) {
/* A trailing backslash, copy verbatim */
*(t++) = *f;
continue;
}
free(r);
return -EINVAL;
}
k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
if (k < 0) {
if (flags & UNESCAPE_RELAX) {
/* Invalid escape code, let's take it literal then */
*(t++) = '\\';
continue;
}
free(r);
return k;
}
f += k;
if (eight_bit)
/* One byte? Set directly as specified */
*(t++) = u;
else
/* Otherwise encode as multi-byte UTF-8 */
t += utf8_encode_unichar(t, u);
}
*t = 0;
*ret = r;
return t - r;
}
int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
return cunescape_length_with_prefix(s, length, NULL, flags, ret);
}
int cunescape(const char *s, UnescapeFlags flags, char **ret) {
return cunescape_length(s, strlen(s), flags, ret);
}
char *xescape(const char *s, const char *bad) {
char *r, *t;
const char *f;
/* Escapes all chars in bad, in addition to \ and all special
* chars, in \xFF style escaping. May be reversed with
* cunescape(). */
r = new(char, strlen(s) * 4 + 1);
if (!r)
return NULL;
for (f = s, t = r; *f; f++) {
if ((*f < ' ') || (*f >= 127) ||
(*f == '\\') || strchr(bad, *f)) {
*(t++) = '\\';
*(t++) = 'x';
*(t++) = hexchar(*f >> 4);
*(t++) = hexchar(*f);
} else
*(t++) = *f;
}
*t = 0;
return r;
}
static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
assert(bad);
for (; *s; s++) {
if (*s == '\\' || strchr(bad, *s))
*(t++) = '\\';
*(t++) = *s;
}
return t;
}
char *shell_escape(const char *s, const char *bad) {
char *r, *t;
r = new(char, strlen(s)*2+1);
if (!r)
return NULL;
t = strcpy_backslash_escaped(r, s, bad);
*t = 0;
return r;
}
char *shell_maybe_quote(const char *s) {
const char *p;
char *r, *t;
assert(s);
/* Encloses a string in double quotes if necessary to make it
* OK as shell string. */
for (p = s; *p; p++)
if (*p <= ' ' ||
*p >= 127 ||
strchr(SHELL_NEED_QUOTES, *p))
break;
if (!*p)
return strdup(s);
r = new(char, 1+strlen(s)*2+1+1);
if (!r)
return NULL;
t = r;
*(t++) = '"';
t = mempcpy(t, s, p - s);
t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
*(t++)= '"';
*t = 0;
return r;
}