term-parser.c revision 1c9633d669948155455e29b0c6e770995a8b1ca3
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
/*
* Terminal Parser
* This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The
* parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST
* control sequences and generic escape sequences.
* The parser itself does not perform any actions but lets the caller react to
* detected sequences.
*/
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include "macro.h"
#include "term-internal.h"
#include "util.h"
/**
* term_utf8_encode() - Encode single UCS-4 character as UTF-8
* @out_utf8: output buffer of at least 4 bytes or NULL
* @g: UCS-4 character to encode
*
* This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
* The length of the character is returned. It is not zero-terminated! If the
* output buffer is NULL, only the length is returned.
*
* Returns: The length in bytes that the UTF-8 representation does or would
* occupy.
*/
if (g < (1 << 7)) {
if (out_utf8)
out_utf8[0] = g & 0x7f;
return 1;
} else if (g < (1 << 11)) {
if (out_utf8) {
}
return 2;
} else if (g < (1 << 16)) {
if (out_utf8) {
}
return 3;
} else if (g < (1 << 21)) {
if (out_utf8) {
}
return 4;
} else {
return 0;
}
}
/**
* term_utf8_decode() - Try decoding the next UCS-4 character
* @p: decoder object to operate on or NULL
* @out_len: output buffer for length of decoded UCS-4 string or NULL
* @c: next char to push into decoder
*
* This decodes a UTF-8 stream. It must be called for each input-byte of the
* UTF-8 stream and returns a UCS-4 stream. The length of the returned UCS-4
* string (number of parsed characters) is stored in @out_len if non-NULL. A
* pointer to the string is returned (or NULL if none was parsed). The string
* is not zero-terminated! Furthermore, the string is only valid until the next
* invokation of this function. It is also bound to the parser-state @p.
*
* This function is highly optimized to work with terminal-emulators. Instead
* of being strict about UTF-8 validity, this tries to perform a fallback to
* ISO-8859-1 in case a wrong series was detected. Therefore, this function
* might return multiple UCS-4 characters by parsing just a single UTF-8 byte.
*
* The parser state @p should be allocated and managed by the caller. There're
* no helpers to do that for you. To initialize it, simply reset it to all
* zero. You can reset or free the object at any point in time.
*
* Returns: Pointer to the UCS-4 string or NULL.
*/
if (!p)
goto out;
byte = c;
/*
* If the previous sequence was invalid or fully parsed, start
* parsing a fresh new sequence.
*/
/* start of two byte sequence */
t = byte & 0x1F;
p->n_bytes = 2;
p->i_bytes = 1;
p->valid = 1;
/* start of three byte sequence */
t = byte & 0x0F;
p->n_bytes = 3;
p->i_bytes = 1;
p->valid = 1;
/* start of four byte sequence */
t = byte & 0x07;
p->n_bytes = 4;
p->i_bytes = 1;
p->valid = 1;
} else {
/* Either of:
* - single ASCII 7-bit char
* - out-of-sync continuation byte
* - overlong encoding
* All of them are treated as single byte ISO-8859-1 */
t = byte;
p->n_bytes = 1;
p->i_bytes = 1;
p->valid = 0;
}
} else {
/*
* ..otherwise, try to continue the previous sequence..
*/
/*
* Valid continuation byte. Append to sequence and
* update the ucs4 cache accordingly.
*/
t = byte & 0x3F;
} else {
/*
* Invalid continuation? Treat cached sequence as
* ISO-8859-1, but parse the new char as valid new
* starting character. If it's a new single-byte UTF-8
* sequence, we immediately return it in the same run,
* otherwise, we might suffer from starvation.
*/
/*
* New multi-byte sequence. Move to-be-returned
* data at the end and start new sequence. Only
* return the old sequence.
*/
p->chars,
/* start of two byte sequence */
t = byte & 0x1F;
p->n_bytes = 2;
p->i_bytes = 1;
p->valid = 1;
/* start of three byte sequence */
t = byte & 0x0F;
p->n_bytes = 3;
p->i_bytes = 1;
p->valid = 1;
/* start of four byte sequence */
t = byte & 0x07;
p->n_bytes = 4;
p->i_bytes = 1;
p->valid = 1;
}
goto out;
} else {
/*
* New single byte sequence, append to output
* and return combined sequence.
*/
p->valid = 0;
}
}
}
/*
* Check whether a full sequence (valid or invalid) has been parsed and
* then return it. Otherwise, return nothing.
*/
if (p->valid) {
/* still parsing? then bail out */
goto out;
len = 1;
} else {
}
p->valid = 0;
p->i_bytes = 0;
p->n_bytes = 0;
out:
if (out_len)
}
/*
* Command Parser
* The ctl-seq parser "term_parser" only detects whole sequences, it does not
* detect the specific command. Once a sequence is parsed, the command-parsers
* are used to figure out their meaning. Note that this depends on whether we
* run on the host or terminal side.
*/
switch (seq->terminator) {
case 0x00: /* NUL */
return TERM_CMD_NULL;
case 0x05: /* ENQ */
return TERM_CMD_ENQ;
case 0x07: /* BEL */
return TERM_CMD_BEL;
case 0x08: /* BS */
return TERM_CMD_BS;
case 0x09: /* HT */
return TERM_CMD_HT;
case 0x0a: /* LF */
return TERM_CMD_LF;
case 0x0b: /* VT */
return TERM_CMD_VT;
case 0x0c: /* FF */
return TERM_CMD_FF;
case 0x0d: /* CR */
return TERM_CMD_CR;
case 0x0e: /* SO */
return TERM_CMD_SO;
case 0x0f: /* SI */
return TERM_CMD_SI;
case 0x11: /* DC1 */
return TERM_CMD_DC1;
case 0x13: /* DC3 */
return TERM_CMD_DC3;
case 0x18: /* CAN */
/* this is already handled by the state-machine */
break;
case 0x1a: /* SUB */
return TERM_CMD_SUB;
case 0x1b: /* ESC */
/* this is already handled by the state-machine */
break;
case 0x1f: /* DEL */
/* this is already handled by the state-machine */
break;
case 0x84: /* IND */
return TERM_CMD_IND;
case 0x85: /* NEL */
return TERM_CMD_NEL;
case 0x88: /* HTS */
return TERM_CMD_HTS;
case 0x8d: /* RI */
return TERM_CMD_RI;
case 0x8e: /* SS2 */
return TERM_CMD_SS2;
case 0x8f: /* SS3 */
return TERM_CMD_SS3;
case 0x90: /* DCS */
/* this is already handled by the state-machine */
break;
case 0x96: /* SPA */
return TERM_CMD_SPA;
case 0x97: /* EPA */
return TERM_CMD_EPA;
case 0x98: /* SOS */
/* this is already handled by the state-machine */
break;
case 0x9a: /* DECID */
return TERM_CMD_DECID;
case 0x9b: /* CSI */
/* this is already handled by the state-machine */
break;
case 0x9c: /* ST */
return TERM_CMD_ST;
case 0x9d: /* OSC */
/* this is already handled by the state-machine */
break;
case 0x9e: /* PM */
/* this is already handled by the state-machine */
break;
case 0x9f: /* APC */
/* this is already handled by the state-machine */
break;
}
return TERM_CMD_NONE;
}
static const struct {
unsigned int flags;
} charset_cmds[] = {
/* 96-compat charsets */
/* 94-compat charsets */
/* special charsets */
/* secondary choices */
[TERM_CHARSET_CNT + TERM_CHARSET_SWEDISH_NRCS] = { .raw = 'H', .flags = 0 }, /* unused; conflicts with ISO_HEBREW */
/* tertiary choices */
[TERM_CHARSET_CNT + TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '6', .flags = 0 },
};
/*
* Secondary choice on SWEDISH_NRCS and primary choice on
* ISO_HEBREW_SUPPLEMENTAL have a conflict: raw=="H", flags==0.
* We always choose the ISO 96-compat set, which is what VT510 does.
*/
for (i = 0; i < ELEMENTSOF(charset_cmds); ++i) {
cs = i;
while (cs >= TERM_CHARSET_CNT)
cs -= TERM_CHARSET_CNT;
return cs;
}
}
return -ENOENT;
}
/* true if exactly one bit in @value is set */
static inline bool exactly_one_bit_set(unsigned int value) {
}
unsigned int t, flags;
int cs;
if (exactly_one_bit_set(flags & t)) {
switch (flags & t) {
case TERM_SEQ_FLAG_POPEN:
case TERM_SEQ_FLAG_PCLOSE:
case TERM_SEQ_FLAG_MULT:
case TERM_SEQ_FLAG_PLUS:
break;
case TERM_SEQ_FLAG_MINUS:
case TERM_SEQ_FLAG_DOT:
case TERM_SEQ_FLAG_SLASH:
break;
default:
break;
}
if (cs >= 0) {
if (cs_out)
return TERM_CMD_SCS;
}
/* looked like a charset-cmd but wasn't; continue */
}
switch (seq->terminator) {
case '3':
return TERM_CMD_DECDHL_TH;
break;
case '4':
return TERM_CMD_DECDHL_BH;
break;
case '5':
return TERM_CMD_DECSWL;
break;
case '6':
if (flags == 0) /* DECBI */
return TERM_CMD_DECBI;
return TERM_CMD_DECDWL;
break;
case '7':
if (flags == 0) /* DECSC */
return TERM_CMD_DECSC;
break;
case '8':
if (flags == 0) /* DECRC */
return TERM_CMD_DECRC;
return TERM_CMD_DECALN;
break;
case '9':
if (flags == 0) /* DECFI */
return TERM_CMD_DECFI;
break;
case '<':
if (flags == 0) /* DECANM */
return TERM_CMD_DECANM;
break;
case '=':
if (flags == 0) /* DECKPAM */
return TERM_CMD_DECKPAM;
break;
case '>':
if (flags == 0) /* DECKPNM */
return TERM_CMD_DECKPNM;
break;
case '@':
if (flags == TERM_SEQ_FLAG_PERCENT) {
/* Select default character set */
return TERM_CMD_XTERM_SDCS;
}
break;
case 'D':
if (flags == 0) /* IND */
return TERM_CMD_IND;
break;
case 'E':
if (flags == 0) /* NEL */
return TERM_CMD_NEL;
break;
case 'F':
if (flags == 0) /* Cursor to lower-left corner of screen */
return TERM_CMD_XTERM_CLLHP;
return TERM_CMD_S7C1T;
break;
case 'G':
return TERM_CMD_S8C1T;
} else if (flags == TERM_SEQ_FLAG_PERCENT) {
/* Select UTF-8 character set */
return TERM_CMD_XTERM_SUCS;
}
break;
case 'H':
if (flags == 0) /* HTS */
return TERM_CMD_HTS;
break;
case 'L':
if (flags == TERM_SEQ_FLAG_SPACE) {
/* Set ANSI conformance level 1 */
return TERM_CMD_XTERM_SACL1;
}
break;
case 'M':
if (flags == 0) { /* RI */
return TERM_CMD_RI;
} else if (flags == TERM_SEQ_FLAG_SPACE) {
/* Set ANSI conformance level 2 */
return TERM_CMD_XTERM_SACL2;
}
break;
case 'N':
if (flags == 0) { /* SS2 */
return TERM_CMD_SS2;
} else if (flags == TERM_SEQ_FLAG_SPACE) {
/* Set ANSI conformance level 3 */
return TERM_CMD_XTERM_SACL3;
}
break;
case 'O':
if (flags == 0) /* SS3 */
return TERM_CMD_SS3;
break;
case 'P':
if (flags == 0) /* DCS: this is already handled by the state-machine */
return 0;
break;
case 'V':
if (flags == 0) /* SPA */
return TERM_CMD_SPA;
break;
case 'W':
if (flags == 0) /* EPA */
return TERM_CMD_EPA;
break;
case 'X':
if (flags == 0) { /* SOS */
/* this is already handled by the state-machine */
break;
}
break;
case 'Z':
if (flags == 0) /* DECID */
return TERM_CMD_DECID;
break;
case '[':
if (flags == 0) { /* CSI */
/* this is already handled by the state-machine */
break;
}
break;
case '\\':
if (flags == 0) /* ST */
return TERM_CMD_ST;
break;
case ']':
if (flags == 0) { /* OSC */
/* this is already handled by the state-machine */
break;
}
break;
case '^':
if (flags == 0) { /* PM */
/* this is already handled by the state-machine */
break;
}
break;
case '_':
if (flags == 0) { /* APC */
/* this is already handled by the state-machine */
break;
}
break;
case 'c':
if (flags == 0) /* RIS */
return TERM_CMD_RIS;
break;
case 'l':
if (flags == 0) /* Memory lock */
return TERM_CMD_XTERM_MLHP;
break;
case 'm':
if (flags == 0) /* Memory unlock */
return TERM_CMD_XTERM_MUHP;
break;
case 'n':
if (flags == 0) /* LS2 */
return TERM_CMD_LS2;
break;
case 'o':
if (flags == 0) /* LS3 */
return TERM_CMD_LS3;
break;
case '|':
if (flags == 0) /* LS3R */
return TERM_CMD_LS3R;
break;
case '}':
if (flags == 0) /* LS2R */
return TERM_CMD_LS2R;
break;
case '~':
if (flags == 0) /* LS1R */
return TERM_CMD_LS1R;
break;
}
return TERM_CMD_NONE;
}
unsigned int flags;
switch (seq->terminator) {
case 'A':
if (flags == 0) /* CUU */
return TERM_CMD_CUU;
break;
case 'a':
if (flags == 0) /* HPR */
return TERM_CMD_HPR;
break;
case 'B':
if (flags == 0) /* CUD */
return TERM_CMD_CUD;
break;
case 'b':
if (flags == 0) /* REP */
return TERM_CMD_REP;
break;
case 'C':
if (flags == 0) /* CUF */
return TERM_CMD_CUF;
break;
case 'c':
if (flags == 0) /* DA1 */
return TERM_CMD_DA1;
return TERM_CMD_DA2;
return TERM_CMD_DA3;
break;
case 'D':
if (flags == 0) /* CUB */
return TERM_CMD_CUB;
break;
case 'd':
if (flags == 0) /* VPA */
return TERM_CMD_VPA;
break;
case 'E':
if (flags == 0) /* CNL */
return TERM_CMD_CNL;
break;
case 'e':
if (flags == 0) /* VPR */
return TERM_CMD_VPR;
break;
case 'F':
if (flags == 0) /* CPL */
return TERM_CMD_CPL;
break;
case 'f':
if (flags == 0) /* HVP */
return TERM_CMD_HVP;
break;
case 'G':
if (flags == 0) /* CHA */
return TERM_CMD_CHA;
break;
case 'g':
if (flags == 0) /* TBC */
return TERM_CMD_TBC;
return TERM_CMD_DECLFKC;
break;
case 'H':
if (flags == 0) /* CUP */
return TERM_CMD_CUP;
break;
case 'h':
if (flags == 0) /* SM ANSI */
return TERM_CMD_SM_ANSI;
return TERM_CMD_SM_DEC;
break;
case 'I':
if (flags == 0) /* CHT */
return TERM_CMD_CHT;
break;
case 'i':
if (flags == 0) /* MC ANSI */
return TERM_CMD_MC_ANSI;
return TERM_CMD_MC_DEC;
break;
case 'J':
if (flags == 0) /* ED */
return TERM_CMD_ED;
return TERM_CMD_DECSED;
break;
case 'K':
if (flags == 0) /* EL */
return TERM_CMD_EL;
return TERM_CMD_DECSEL;
break;
case 'L':
if (flags == 0) /* IL */
return TERM_CMD_IL;
break;
case 'l':
if (flags == 0) /* RM ANSI */
return TERM_CMD_RM_ANSI;
return TERM_CMD_RM_DEC;
break;
case 'M':
if (flags == 0) /* DL */
return TERM_CMD_DL;
break;
case 'm':
if (flags == 0) /* SGR */
return TERM_CMD_SGR;
return TERM_CMD_XTERM_SRV;
break;
case 'n':
if (flags == 0) /* DSR ANSI */
return TERM_CMD_DSR_ANSI;
return TERM_CMD_XTERM_RRV;
return TERM_CMD_DSR_DEC;
break;
case 'P':
if (flags == 0) /* DCH */
return TERM_CMD_DCH;
return TERM_CMD_PPA;
break;
case 'p':
if (flags == 0) /* DECSSL */
return TERM_CMD_DECSSL;
return TERM_CMD_DECSSCLS;
return TERM_CMD_DECSTR;
return TERM_CMD_DECSCL;
return TERM_CMD_DECRQM_ANSI;
return TERM_CMD_DECRQM_DEC;
return TERM_CMD_DECSDPT;
return TERM_CMD_DECSPPCS;
return TERM_CMD_DECSR;
return TERM_CMD_DECLTOD;
return TERM_CMD_XTERM_SPM;
break;
case 'Q':
return TERM_CMD_PPR;
break;
case 'q':
if (flags == 0) /* DECLL */
return TERM_CMD_DECLL;
return TERM_CMD_DECSCUSR;
return TERM_CMD_DECSCA;
return TERM_CMD_DECSDDT;
return TERM_CMD_DECSR;
return TERM_CMD_DECELF;
return TERM_CMD_DECTID;
break;
case 'R':
return TERM_CMD_PPB;
break;
case 'r':
if (flags == 0) {
/* DECSTBM */
return TERM_CMD_DECSTBM;
} else if (flags == TERM_SEQ_FLAG_SPACE) {
/* DECSKCV */
return TERM_CMD_DECSKCV;
} else if (flags == TERM_SEQ_FLAG_CASH) {
/* DECCARA */
return TERM_CMD_DECCARA;
} else if (flags == TERM_SEQ_FLAG_MULT) {
/* DECSCS */
return TERM_CMD_DECSCS;
} else if (flags == TERM_SEQ_FLAG_PLUS) {
/* DECSMKR */
return TERM_CMD_DECSMKR;
} else if (flags == TERM_SEQ_FLAG_WHAT) {
/*
* There's a conflict between DECPCTERM and XTERM-RPM.
* XTERM-RPM takes a single argument, DECPCTERM takes 2.
* Split both up and forward the call to the closer
* match.
*/
return TERM_CMD_XTERM_RPM;
return TERM_CMD_DECPCTERM;
}
break;
case 'S':
if (flags == 0) /* SU */
return TERM_CMD_SU;
return TERM_CMD_XTERM_SGFX;
break;
case 's':
if (flags == 0) {
/*
* There's a conflict between DECSLRM and SC-ANSI which
* cannot be resolved without knowing the state of
* DECLRMM. We leave that decision up to the caller.
*/
return TERM_CMD_DECSLRM_OR_SC;
} else if (flags == TERM_SEQ_FLAG_CASH) {
/* DECSPRTT */
return TERM_CMD_DECSPRTT;
} else if (flags == TERM_SEQ_FLAG_MULT) {
/* DECSFC */
return TERM_CMD_DECSFC;
} else if (flags == TERM_SEQ_FLAG_WHAT) {
/* XTERM SPM */
return TERM_CMD_XTERM_SPM;
}
break;
case 'T':
if (flags == 0) {
/*
* Awesome: There's a conflict between SD and XTERM IHMT
* that we have to resolve by checking the parameter
* count.. XTERM_IHMT needs exactly 5 arguments, SD
* takes 0 or 1. We're conservative here and give both
* a wider range to allow unused arguments (compat...).
*/
/* XTERM IHMT */
return TERM_CMD_XTERM_IHMT;
/* SD */
return TERM_CMD_SD;
}
} else if (flags == TERM_SEQ_FLAG_GT) {
/* XTERM RTM */
return TERM_CMD_XTERM_RTM;
}
break;
case 't':
if (flags == 0) {
/* XTERM WM */
return TERM_CMD_XTERM_WM;
} else {
/* DECSLPP */
return TERM_CMD_DECSLPP;
}
} else if (flags == TERM_SEQ_FLAG_SPACE) {
/* DECSWBV */
return TERM_CMD_DECSWBV;
} else if (flags == TERM_SEQ_FLAG_DQUOTE) {
/* DECSRFR */
return TERM_CMD_DECSRFR;
} else if (flags == TERM_SEQ_FLAG_CASH) {
/* DECRARA */
return TERM_CMD_DECRARA;
} else if (flags == TERM_SEQ_FLAG_GT) {
/* XTERM STM */
return TERM_CMD_XTERM_STM;
}
break;
case 'U':
if (flags == 0) /* NP */
return TERM_CMD_NP;
break;
case 'u':
if (flags == 0) {
/* RC */
return TERM_CMD_RC;
} else if (flags == TERM_SEQ_FLAG_SPACE) {
/* DECSMBV */
return TERM_CMD_DECSMBV;
} else if (flags == TERM_SEQ_FLAG_DQUOTE) {
/* DECSTRL */
return TERM_CMD_DECSTRL;
} else if (flags == TERM_SEQ_FLAG_WHAT) {
/* DECRQUPSS */
return TERM_CMD_DECRQUPSS;
/* DECRQTSR */
return TERM_CMD_DECRQTSR;
} else if (flags == TERM_SEQ_FLAG_MULT) {
/* DECSCP */
return TERM_CMD_DECSCP;
} else if (flags == TERM_SEQ_FLAG_COMMA) {
/* DECRQKT */
return TERM_CMD_DECRQKT;
}
break;
case 'V':
if (flags == 0) /* PP */
return TERM_CMD_PP;
break;
case 'v':
return TERM_CMD_DECSLCK;
return TERM_CMD_DECRQDE;
return TERM_CMD_DECCRA;
return TERM_CMD_DECRPKT;
break;
case 'W':
/* DECST8C */
return TERM_CMD_DECST8C;
}
break;
case 'w':
return TERM_CMD_DECRQPSR;
return TERM_CMD_DECEFR;
return TERM_CMD_DECSPP;
break;
case 'X':
if (flags == 0) /* ECH */
return TERM_CMD_ECH;
break;
case 'x':
if (flags == 0) /* DECREQTPARM */
return TERM_CMD_DECREQTPARM;
return TERM_CMD_DECFRA;
return TERM_CMD_DECSACE;
return TERM_CMD_DECRQPKFM;
break;
case 'y':
if (flags == 0) /* DECTST */
return TERM_CMD_DECTST;
return TERM_CMD_DECRQCRA;
return TERM_CMD_DECPKFMR;
break;
case 'Z':
if (flags == 0) /* CBT */
return TERM_CMD_CBT;
break;
case 'z':
return TERM_CMD_DECERA;
return TERM_CMD_DECELR;
return TERM_CMD_DECINVM;
return TERM_CMD_DECPKA;
break;
case '@':
if (flags == 0) /* ICH */
return TERM_CMD_ICH;
break;
case '`':
if (flags == 0) /* HPA */
return TERM_CMD_HPA;
break;
case '{':
return TERM_CMD_DECSERA;
return TERM_CMD_DECSLE;
break;
case '|':
return TERM_CMD_DECSCPP;
return TERM_CMD_DECRQLP;
return TERM_CMD_DECSNLS;
break;
case '}':
return TERM_CMD_DECKBD;
return TERM_CMD_DECSASD;
return TERM_CMD_DECIC;
break;
case '~':
return TERM_CMD_DECTME;
return TERM_CMD_DECSSDT;
return TERM_CMD_DECDC;
break;
}
return TERM_CMD_NONE;
}
/*
* State Machine
* This parser controls the parser-state and returns any detected sequence to
* the caller. The parser is based on this state-diagram from Paul Williams:
* It was written from scratch and extended where needed.
* This parser is fully compatible up to the vt500 series. We expect UCS-4 as
* input. It's the callers responsibility to do any UTF-8 parsing.
*/
enum parser_state {
STATE_NONE, /* placeholder */
STATE_GROUND, /* initial state and ground */
STATE_ESC, /* ESC sequence was started */
STATE_ESC_INT, /* intermediate escape characters */
STATE_CSI_ENTRY, /* starting CSI sequence */
STATE_CSI_PARAM, /* CSI parameters */
STATE_CSI_INT, /* intermediate CSI characters */
STATE_CSI_IGNORE, /* CSI error; ignore this CSI sequence */
STATE_DCS_ENTRY, /* starting DCS sequence */
STATE_DCS_PARAM, /* DCS parameters */
STATE_DCS_INT, /* intermediate DCS characters */
STATE_DCS_PASS, /* DCS data passthrough */
STATE_DCS_IGNORE, /* DCS error; ignore this DCS sequence */
STATE_OSC_STRING, /* parsing OSC sequence */
STATE_ST_IGNORE, /* unimplemented seq; ignore until ST */
};
enum parser_action {
ACTION_NONE, /* placeholder */
ACTION_CLEAR, /* clear parameters */
ACTION_IGNORE, /* ignore the character entirely */
ACTION_PRINT, /* print the character on the console */
ACTION_EXECUTE, /* execute single control character (C0/C1) */
ACTION_COLLECT, /* collect intermediate character */
ACTION_PARAM, /* collect parameter character */
ACTION_ESC_DISPATCH, /* dispatch escape sequence */
ACTION_CSI_DISPATCH, /* dispatch csi sequence */
ACTION_DCS_START, /* start of DCS data */
ACTION_DCS_COLLECT, /* collect DCS data */
ACTION_DCS_CONSUME, /* consume DCS terminator */
ACTION_DCS_DISPATCH, /* dispatch dcs sequence */
ACTION_OSC_START, /* start of OSC data */
ACTION_OSC_COLLECT, /* collect OSC data */
ACTION_OSC_CONSUME, /* consume OSC terminator */
ACTION_OSC_DISPATCH, /* dispatch osc sequence */
};
if (!parser)
return -ENOMEM;
return -ENOMEM;
return 0;
}
if (!parser)
return NULL;
return NULL;
}
unsigned int i;
for (i = 0; i < TERM_PARSER_ARG_MAX; ++i)
}
}
}
}
/*
* Usually, characters from 0x30 to 0x3f are only allowed as leading
* markers (or as part of the parameters), characters from 0x20 to 0x2f
* are only allowed as trailing markers. However, our state-machine
* already verifies those restrictions so we can handle them the same
* way here. Note that we safely allow markers to be specified multiple
* times.
*/
}
int new;
if (raw == ';') {
return;
}
return;
if (new < 0)
new = 0;
/* VT510 tells us to clamp all values to [0, 9999], however, it
* also allows commands with values up to 2^15-1. We simply use
* 2^16 as maximum here to be compatible to all commands, but
* avoid overflows in any calculations. */
if (new > 0xffff)
new = 0xffff;
}
}
}
/* parser->seq is cleared during CSI-ENTER state, thus there's no need
* to clear invalid fields here. */
}
}
/* perform state transition and dispatch related actions */
static int parser_transition(term_parser *parser, uint32_t raw, unsigned int state, unsigned int action) {
if (state != STATE_NONE)
switch (action) {
case ACTION_NONE:
return TERM_SEQ_NONE;
case ACTION_CLEAR:
return TERM_SEQ_NONE;
case ACTION_IGNORE:
case ACTION_PRINT:
case ACTION_EXECUTE:
case ACTION_COLLECT:
return TERM_SEQ_NONE;
case ACTION_PARAM:
return TERM_SEQ_NONE;
case ACTION_ESC_DISPATCH:
case ACTION_CSI_DISPATCH:
case ACTION_DCS_START:
/* not implemented */
return TERM_SEQ_NONE;
case ACTION_DCS_COLLECT:
/* not implemented */
return TERM_SEQ_NONE;
case ACTION_DCS_CONSUME:
/* not implemented */
return TERM_SEQ_NONE;
case ACTION_DCS_DISPATCH:
/* not implemented */
return TERM_SEQ_NONE;
case ACTION_OSC_START:
/* not implemented */
return TERM_SEQ_NONE;
case ACTION_OSC_COLLECT:
/* not implemented */
return TERM_SEQ_NONE;
case ACTION_OSC_CONSUME:
/* not implemented */
return TERM_SEQ_NONE;
case ACTION_OSC_DISPATCH:
/* not implemented */
return TERM_SEQ_NONE;
default:
assert_not_reached("invalid vte-parser action");
return TERM_SEQ_NONE;
}
}
case STATE_NONE:
/*
* During initialization, parser->state is cleared. Treat this
* as STATE_GROUND. We will then never get to STATE_NONE again.
*/
case STATE_GROUND:
switch (raw) {
case 0x00 ... 0x1f: /* C0 */
case 0x80 ... 0x9b: /* C1 \ { ST } */
case 0x9d ... 0x9f:
case 0x9c: /* ST */
}
case STATE_ESC:
switch (raw) {
case 0x00 ... 0x1f: /* C0 */
case 0x20 ... 0x2f: /* [' ' - '\'] */
case 0x30 ... 0x4f: /* ['0' - '~'] \ { 'P', 'X', '[', ']', '^', '_' } */
case 0x51 ... 0x57:
case 0x59 ... 0x5a:
case 0x5c:
case 0x60 ... 0x7e:
case 0x50: /* 'P' */
case 0x5b: /* '[' */
case 0x5d: /* ']' */
case 0x58: /* 'X' */
case 0x5e: /* '^' */
case 0x5f: /* '_' */
case 0x7f: /* DEL */
case 0x9c: /* ST */
}
case STATE_ESC_INT:
switch (raw) {
case 0x00 ... 0x1f: /* C0 */
case 0x20 ... 0x2f: /* [' ' - '\'] */
case 0x30 ... 0x7e: /* ['0' - '~'] */
case 0x7f: /* DEL */
case 0x9c: /* ST */
}
case STATE_CSI_ENTRY:
switch (raw) {
case 0x00 ... 0x1f: /* C0 */
case 0x20 ... 0x2f: /* [' ' - '\'] */
case 0x3a: /* ':' */
case 0x30 ... 0x39: /* ['0' - '9'] */
case 0x3b: /* ';' */
case 0x3c ... 0x3f: /* ['<' - '?'] */
case 0x40 ... 0x7e: /* ['@' - '~'] */
case 0x7f: /* DEL */
case 0x9c: /* ST */
}
case STATE_CSI_PARAM:
switch (raw) {
case 0x00 ... 0x1f: /* C0 */
case 0x20 ... 0x2f: /* [' ' - '\'] */
case 0x30 ... 0x39: /* ['0' - '9'] */
case 0x3b: /* ';' */
case 0x3a: /* ':' */
case 0x3c ... 0x3f: /* ['<' - '?'] */
case 0x40 ... 0x7e: /* ['@' - '~'] */
case 0x7f: /* DEL */
case 0x9c: /* ST */
}
case STATE_CSI_INT:
switch (raw) {
case 0x00 ... 0x1f: /* C0 */
case 0x20 ... 0x2f: /* [' ' - '\'] */
case 0x30 ... 0x3f: /* ['0' - '?'] */
case 0x40 ... 0x7e: /* ['@' - '~'] */
case 0x7f: /* DEL */
case 0x9c: /* ST */
}
case STATE_CSI_IGNORE:
switch (raw) {
case 0x00 ... 0x1f: /* C0 */
case 0x20 ... 0x3f: /* [' ' - '?'] */
case 0x40 ... 0x7e: /* ['@' - '~'] */
case 0x7f: /* DEL */
case 0x9c: /* ST */
}
case STATE_DCS_ENTRY:
switch (raw) {
case 0x00 ... 0x1f: /* C0 */
case 0x20 ... 0x2f: /* [' ' - '\'] */
case 0x3a: /* ':' */
case 0x30 ... 0x39: /* ['0' - '9'] */
case 0x3b: /* ';' */
case 0x3c ... 0x3f: /* ['<' - '?'] */
case 0x40 ... 0x7e: /* ['@' - '~'] */
case 0x7f: /* DEL */
case 0x9c: /* ST */
}
case STATE_DCS_PARAM:
switch (raw) {
case 0x00 ... 0x1f: /* C0 */
case 0x20 ... 0x2f: /* [' ' - '\'] */
case 0x30 ... 0x39: /* ['0' - '9'] */
case 0x3b: /* ';' */
case 0x3a: /* ':' */
case 0x3c ... 0x3f: /* ['<' - '?'] */
case 0x40 ... 0x7e: /* ['@' - '~'] */
case 0x7f: /* DEL */
case 0x9c: /* ST */
}
case STATE_DCS_INT:
switch (raw) {
case 0x00 ... 0x1f: /* C0 */
case 0x20 ... 0x2f: /* [' ' - '\'] */
case 0x30 ... 0x3f: /* ['0' - '?'] */
case 0x40 ... 0x7e: /* ['@' - '~'] */
case 0x7f: /* DEL */
case 0x9c: /* ST */
}
case STATE_DCS_PASS:
switch (raw) {
case 0x00 ... 0x7e: /* ASCII \ { DEL } */
case 0x7f: /* DEL */
case 0x9c: /* ST */
}
case STATE_DCS_IGNORE:
switch (raw) {
case 0x00 ... 0x7f: /* ASCII */
case 0x9c: /* ST */
}
case STATE_OSC_STRING:
switch (raw) {
case 0x00 ... 0x06: /* C0 \ { BEL } */
case 0x08 ... 0x1f:
case 0x20 ... 0x7f: /* [' ' - DEL] */
case 0x07: /* BEL */
case 0x9c: /* ST */
}
case STATE_ST_IGNORE:
switch (raw) {
case 0x00 ... 0x7f: /* ASCII */
case 0x9c: /* ST */
}
}
assert_not_reached("bad vte-parser state");
return -EINVAL;
}
int r;
/*
* Notes:
* * DEC treats GR codes as GL. We don't do that as we require UTF-8
* as charset and, thus, it doesn't make sense to treat GR special.
* * During control sequences, unexpected C1 codes cancel the sequence
* and immediately start a new one. C0 codes, however, may or may not
*/
switch (raw) {
case 0x18: /* CAN */
break;
case 0x1a: /* SUB */
break;
case 0x80 ... 0x8f: /* C1 \ {DCS, SOS, CSI, ST, OSC, PM, APC} */
case 0x91 ... 0x97:
case 0x99 ... 0x9a:
break;
case 0x1b: /* ESC */
break;
case 0x98: /* SOS */
case 0x9e: /* PM */
case 0x9f: /* APC */
break;
case 0x90: /* DCS */
break;
case 0x9d: /* OSC */
break;
case 0x9b: /* CSI */
break;
default:
break;
}
if (r <= 0)
else
return r;
}