1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 * This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The
25 * parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST
26 * control sequences and generic escape sequences.
27 * The parser itself does not perform any actions but lets the caller react to
35 #include "term-internal.h"
39 * term_utf8_encode() - Encode single UCS-4 character as UTF-8
40 * @out_utf8: output buffer of at least 4 bytes or NULL
41 * @g: UCS-4 character to encode
43 * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
44 * The length of the character is returned. It is not zero-terminated! If the
45 * output buffer is NULL, only the length is returned.
47 * Returns: The length in bytes that the UTF-8 representation does or would
50 size_t term_utf8_encode(char *out_utf8, uint32_t g) {
53 out_utf8[0] = g & 0x7f;
55 } else if (g < (1 << 11)) {
57 out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f);
58 out_utf8[1] = 0x80 | (g & 0x3f);
61 } else if (g < (1 << 16)) {
63 out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f);
64 out_utf8[1] = 0x80 | ((g >> 6) & 0x3f);
65 out_utf8[2] = 0x80 | (g & 0x3f);
68 } else if (g < (1 << 21)) {
70 out_utf8[0] = 0xf0 | ((g >> 18) & 0x07);
71 out_utf8[1] = 0x80 | ((g >> 12) & 0x3f);
72 out_utf8[2] = 0x80 | ((g >> 6) & 0x3f);
73 out_utf8[3] = 0x80 | (g & 0x3f);
82 * term_utf8_decode() - Try decoding the next UCS-4 character
83 * @p: decoder object to operate on or NULL
84 * @out_len: output buffer for length of decoded UCS-4 string or NULL
85 * @c: next char to push into decoder
87 * This decodes a UTF-8 stream. It must be called for each input-byte of the
88 * UTF-8 stream and returns a UCS-4 stream. The length of the returned UCS-4
89 * string (number of parsed characters) is stored in @out_len if non-NULL. A
90 * pointer to the string is returned (or NULL if none was parsed). The string
91 * is not zero-terminated! Furthermore, the string is only valid until the next
92 * invokation of this function. It is also bound to the parser-state @p.
94 * This function is highly optimized to work with terminal-emulators. Instead
95 * of being strict about UTF-8 validity, this tries to perform a fallback to
96 * ISO-8859-1 in case a wrong series was detected. Therefore, this function
97 * might return multiple UCS-4 characters by parsing just a single UTF-8 byte.
99 * The parser state @p should be allocated and managed by the caller. There're
100 * no helpers to do that for you. To initialize it, simply reset it to all
101 * zero. You can reset or free the object at any point in time.
103 * Returns: Pointer to the UCS-4 string or NULL.
105 const uint32_t *term_utf8_decode(term_utf8 *p, size_t *out_len, char c) {
106 uint32_t t, *res = NULL;
115 if (!p->valid || p->i_bytes >= p->n_bytes) {
117 * If the previous sequence was invalid or fully parsed, start
118 * parsing a fresh new sequence.
121 if ((byte & 0xE0) == 0xC0) {
122 /* start of two byte sequence */
127 } else if ((byte & 0xF0) == 0xE0) {
128 /* start of three byte sequence */
133 } else if ((byte & 0xF8) == 0xF0) {
134 /* start of four byte sequence */
141 * - single ASCII 7-bit char
142 * - out-of-sync continuation byte
143 * - overlong encoding
144 * All of them are treated as single byte ISO-8859-1 */
152 p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
155 * ..otherwise, try to continue the previous sequence..
158 if ((byte & 0xC0) == 0x80) {
160 * Valid continuation byte. Append to sequence and
161 * update the ucs4 cache accordingly.
165 p->chars[p->i_bytes++] = byte;
166 p->ucs4 |= t << (6 * (p->n_bytes - p->i_bytes));
169 * Invalid continuation? Treat cached sequence as
170 * ISO-8859-1, but parse the new char as valid new
171 * starting character. If it's a new single-byte UTF-8
172 * sequence, we immediately return it in the same run,
173 * otherwise, we might suffer from starvation.
176 if ((byte & 0xE0) == 0xC0 ||
177 (byte & 0xF0) == 0xE0 ||
178 (byte & 0xF8) == 0xF0) {
180 * New multi-byte sequence. Move to-be-returned
181 * data at the end and start new sequence. Only
182 * return the old sequence.
185 memmove(p->chars + 1,
187 sizeof(*p->chars) * p->i_bytes);
191 if ((byte & 0xE0) == 0xC0) {
192 /* start of two byte sequence */
197 } else if ((byte & 0xF0) == 0xE0) {
198 /* start of three byte sequence */
203 } else if ((byte & 0xF8) == 0xF0) {
204 /* start of four byte sequence */
210 assert_not_reached("Should not happen");
213 p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
218 * New single byte sequence, append to output
219 * and return combined sequence.
222 p->chars[p->i_bytes++] = byte;
229 * Check whether a full sequence (valid or invalid) has been parsed and
230 * then return it. Otherwise, return nothing.
233 /* still parsing? then bail out */
234 if (p->i_bytes < p->n_bytes)
251 return len > 0 ? res : NULL;
256 * The ctl-seq parser "term_parser" only detects whole sequences, it does not
257 * detect the specific command. Once a sequence is parsed, the command-parsers
258 * are used to figure out their meaning. Note that this depends on whether we
259 * run on the host or terminal side.
262 static unsigned int term_parse_host_control(const term_seq *seq) {
263 assert_return(seq, TERM_CMD_NONE);
265 switch (seq->terminator) {
267 return TERM_CMD_NULL;
293 /* this is already handled by the state-machine */
298 /* this is already handled by the state-machine */
301 /* this is already handled by the state-machine */
316 /* this is already handled by the state-machine */
323 /* this is already handled by the state-machine */
325 case 0x9a: /* DECID */
326 return TERM_CMD_DECID;
328 /* this is already handled by the state-machine */
333 /* this is already handled by the state-machine */
336 /* this is already handled by the state-machine */
339 /* this is already handled by the state-machine */
343 return TERM_CMD_NONE;
346 static inline int charset_from_cmd(uint32_t raw, unsigned int flags, bool require_96) {
347 static const struct {
351 /* 96-compat charsets */
352 [TERM_CHARSET_ISO_LATIN1_SUPPLEMENTAL] = { .raw = 'A', .flags = 0 },
353 [TERM_CHARSET_ISO_LATIN2_SUPPLEMENTAL] = { .raw = 'B', .flags = 0 },
354 [TERM_CHARSET_ISO_LATIN5_SUPPLEMENTAL] = { .raw = 'M', .flags = 0 },
355 [TERM_CHARSET_ISO_GREEK_SUPPLEMENTAL] = { .raw = 'F', .flags = 0 },
356 [TERM_CHARSET_ISO_HEBREW_SUPPLEMENTAL] = { .raw = 'H', .flags = 0 },
357 [TERM_CHARSET_ISO_LATIN_CYRILLIC] = { .raw = 'L', .flags = 0 },
359 /* 94-compat charsets */
360 [TERM_CHARSET_DEC_SPECIAL_GRAPHIC] = { .raw = '0', .flags = 0 },
361 [TERM_CHARSET_DEC_SUPPLEMENTAL] = { .raw = '5', .flags = TERM_SEQ_FLAG_PERCENT },
362 [TERM_CHARSET_DEC_TECHNICAL] = { .raw = '>', .flags = 0 },
363 [TERM_CHARSET_CYRILLIC_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_AND },
364 [TERM_CHARSET_DUTCH_NRCS] = { .raw = '4', .flags = 0 },
365 [TERM_CHARSET_FINNISH_NRCS] = { .raw = '5', .flags = 0 },
366 [TERM_CHARSET_FRENCH_NRCS] = { .raw = 'R', .flags = 0 },
367 [TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = '9', .flags = 0 },
368 [TERM_CHARSET_GERMAN_NRCS] = { .raw = 'K', .flags = 0 },
369 [TERM_CHARSET_GREEK_DEC] = { .raw = '?', .flags = TERM_SEQ_FLAG_DQUOTE },
370 [TERM_CHARSET_GREEK_NRCS] = { .raw = '>', .flags = TERM_SEQ_FLAG_DQUOTE },
371 [TERM_CHARSET_HEBREW_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_DQUOTE },
372 [TERM_CHARSET_HEBREW_NRCS] = { .raw = '=', .flags = TERM_SEQ_FLAG_PERCENT },
373 [TERM_CHARSET_ITALIAN_NRCS] = { .raw = 'Y', .flags = 0 },
374 [TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '`', .flags = 0 },
375 [TERM_CHARSET_PORTUGUESE_NRCS] = { .raw = '6', .flags = TERM_SEQ_FLAG_PERCENT },
376 [TERM_CHARSET_RUSSIAN_NRCS] = { .raw = '5', .flags = TERM_SEQ_FLAG_AND },
377 [TERM_CHARSET_SCS_NRCS] = { .raw = '3', .flags = TERM_SEQ_FLAG_PERCENT },
378 [TERM_CHARSET_SPANISH_NRCS] = { .raw = 'Z', .flags = 0 },
379 [TERM_CHARSET_SWEDISH_NRCS] = { .raw = '7', .flags = 0 },
380 [TERM_CHARSET_SWISS_NRCS] = { .raw = '=', .flags = 0 },
381 [TERM_CHARSET_TURKISH_DEC] = { .raw = '0', .flags = TERM_SEQ_FLAG_PERCENT },
382 [TERM_CHARSET_TURKISH_NRCS] = { .raw = '2', .flags = TERM_SEQ_FLAG_PERCENT },
384 /* special charsets */
385 [TERM_CHARSET_USERPREF_SUPPLEMENTAL] = { .raw = '<', .flags = 0 },
387 /* secondary choices */
388 [TERM_CHARSET_CNT + TERM_CHARSET_FINNISH_NRCS] = { .raw = 'C', .flags = 0 },
389 [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_NRCS] = { .raw = 'f', .flags = 0 },
390 [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = 'Q', .flags = 0 },
391 [TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = 'E', .flags = 0 },
392 [TERM_CHARSET_CNT + TERM_CHARSET_SWEDISH_NRCS] = { .raw = 'H', .flags = 0 }, /* unused; conflicts with ISO_HEBREW */
394 /* tertiary choices */
395 [TERM_CHARSET_CNT + TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '6', .flags = 0 },
400 * Secondary choice on SWEDISH_NRCS and primary choice on
401 * ISO_HEBREW_SUPPLEMENTAL have a conflict: raw=="H", flags==0.
402 * We always choose the ISO 96-compat set, which is what VT510 does.
405 for (i = 0; i < ELEMENTSOF(charset_cmds); ++i) {
406 if (charset_cmds[i].raw == raw && charset_cmds[i].flags == flags) {
408 while (cs >= TERM_CHARSET_CNT)
409 cs -= TERM_CHARSET_CNT;
411 if (!require_96 || cs < TERM_CHARSET_96_CNT || cs >= TERM_CHARSET_94_CNT)
419 /* true if exactly one bit in @value is set */
420 static inline bool exactly_one_bit_set(unsigned int value) {
421 return __builtin_popcount(value) == 1;
424 static unsigned int term_parse_host_escape(const term_seq *seq, unsigned int *cs_out) {
425 unsigned int t, flags;
428 assert_return(seq, TERM_CMD_NONE);
430 flags = seq->intermediates;
431 t = TERM_SEQ_FLAG_POPEN | TERM_SEQ_FLAG_PCLOSE | TERM_SEQ_FLAG_MULT |
432 TERM_SEQ_FLAG_PLUS | TERM_SEQ_FLAG_MINUS | TERM_SEQ_FLAG_DOT |
435 if (exactly_one_bit_set(flags & t)) {
437 case TERM_SEQ_FLAG_POPEN:
438 case TERM_SEQ_FLAG_PCLOSE:
439 case TERM_SEQ_FLAG_MULT:
440 case TERM_SEQ_FLAG_PLUS:
441 cs = charset_from_cmd(seq->terminator, flags & ~t, false);
443 case TERM_SEQ_FLAG_MINUS:
444 case TERM_SEQ_FLAG_DOT:
445 case TERM_SEQ_FLAG_SLASH:
446 cs = charset_from_cmd(seq->terminator, flags & ~t, true);
459 /* looked like a charset-cmd but wasn't; continue */
462 switch (seq->terminator) {
464 if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL top-half */
465 return TERM_CMD_DECDHL_TH;
468 if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL bottom-half */
469 return TERM_CMD_DECDHL_BH;
472 if (flags == TERM_SEQ_FLAG_HASH) /* DECSWL */
473 return TERM_CMD_DECSWL;
476 if (flags == 0) /* DECBI */
477 return TERM_CMD_DECBI;
478 else if (flags == TERM_SEQ_FLAG_HASH) /* DECDWL */
479 return TERM_CMD_DECDWL;
482 if (flags == 0) /* DECSC */
483 return TERM_CMD_DECSC;
486 if (flags == 0) /* DECRC */
487 return TERM_CMD_DECRC;
488 else if (flags == TERM_SEQ_FLAG_HASH) /* DECALN */
489 return TERM_CMD_DECALN;
492 if (flags == 0) /* DECFI */
493 return TERM_CMD_DECFI;
496 if (flags == 0) /* DECANM */
497 return TERM_CMD_DECANM;
500 if (flags == 0) /* DECKPAM */
501 return TERM_CMD_DECKPAM;
504 if (flags == 0) /* DECKPNM */
505 return TERM_CMD_DECKPNM;
508 if (flags == TERM_SEQ_FLAG_PERCENT) {
509 /* Select default character set */
510 return TERM_CMD_XTERM_SDCS;
514 if (flags == 0) /* IND */
518 if (flags == 0) /* NEL */
522 if (flags == 0) /* Cursor to lower-left corner of screen */
523 return TERM_CMD_XTERM_CLLHP;
524 else if (flags == TERM_SEQ_FLAG_SPACE) /* S7C1T */
525 return TERM_CMD_S7C1T;
528 if (flags == TERM_SEQ_FLAG_SPACE) { /* S8C1T */
529 return TERM_CMD_S8C1T;
530 } else if (flags == TERM_SEQ_FLAG_PERCENT) {
531 /* Select UTF-8 character set */
532 return TERM_CMD_XTERM_SUCS;
536 if (flags == 0) /* HTS */
540 if (flags == TERM_SEQ_FLAG_SPACE) {
541 /* Set ANSI conformance level 1 */
542 return TERM_CMD_XTERM_SACL1;
546 if (flags == 0) { /* RI */
548 } else if (flags == TERM_SEQ_FLAG_SPACE) {
549 /* Set ANSI conformance level 2 */
550 return TERM_CMD_XTERM_SACL2;
554 if (flags == 0) { /* SS2 */
556 } else if (flags == TERM_SEQ_FLAG_SPACE) {
557 /* Set ANSI conformance level 3 */
558 return TERM_CMD_XTERM_SACL3;
562 if (flags == 0) /* SS3 */
566 if (flags == 0) /* DCS: this is already handled by the state-machine */
570 if (flags == 0) /* SPA */
574 if (flags == 0) /* EPA */
578 if (flags == 0) { /* SOS */
579 /* this is already handled by the state-machine */
584 if (flags == 0) /* DECID */
585 return TERM_CMD_DECID;
588 if (flags == 0) { /* CSI */
589 /* this is already handled by the state-machine */
594 if (flags == 0) /* ST */
598 if (flags == 0) { /* OSC */
599 /* this is already handled by the state-machine */
604 if (flags == 0) { /* PM */
605 /* this is already handled by the state-machine */
610 if (flags == 0) { /* APC */
611 /* this is already handled by the state-machine */
616 if (flags == 0) /* RIS */
620 if (flags == 0) /* Memory lock */
621 return TERM_CMD_XTERM_MLHP;
624 if (flags == 0) /* Memory unlock */
625 return TERM_CMD_XTERM_MUHP;
628 if (flags == 0) /* LS2 */
632 if (flags == 0) /* LS3 */
636 if (flags == 0) /* LS3R */
637 return TERM_CMD_LS3R;
640 if (flags == 0) /* LS2R */
641 return TERM_CMD_LS2R;
644 if (flags == 0) /* LS1R */
645 return TERM_CMD_LS1R;
649 return TERM_CMD_NONE;
652 static unsigned int term_parse_host_csi(const term_seq *seq) {
655 assert_return(seq, TERM_CMD_NONE);
657 flags = seq->intermediates;
659 switch (seq->terminator) {
661 if (flags == 0) /* CUU */
665 if (flags == 0) /* HPR */
669 if (flags == 0) /* CUD */
673 if (flags == 0) /* REP */
677 if (flags == 0) /* CUF */
681 if (flags == 0) /* DA1 */
683 else if (flags == TERM_SEQ_FLAG_GT) /* DA2 */
685 else if (flags == TERM_SEQ_FLAG_EQUAL) /* DA3 */
689 if (flags == 0) /* CUB */
693 if (flags == 0) /* VPA */
697 if (flags == 0) /* CNL */
701 if (flags == 0) /* VPR */
705 if (flags == 0) /* CPL */
709 if (flags == 0) /* HVP */
713 if (flags == 0) /* CHA */
717 if (flags == 0) /* TBC */
719 else if (flags == TERM_SEQ_FLAG_MULT) /* DECLFKC */
720 return TERM_CMD_DECLFKC;
723 if (flags == 0) /* CUP */
727 if (flags == 0) /* SM ANSI */
728 return TERM_CMD_SM_ANSI;
729 else if (flags == TERM_SEQ_FLAG_WHAT) /* SM DEC */
730 return TERM_CMD_SM_DEC;
733 if (flags == 0) /* CHT */
737 if (flags == 0) /* MC ANSI */
738 return TERM_CMD_MC_ANSI;
739 else if (flags == TERM_SEQ_FLAG_WHAT) /* MC DEC */
740 return TERM_CMD_MC_DEC;
743 if (flags == 0) /* ED */
745 else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSED */
746 return TERM_CMD_DECSED;
749 if (flags == 0) /* EL */
751 else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSEL */
752 return TERM_CMD_DECSEL;
755 if (flags == 0) /* IL */
759 if (flags == 0) /* RM ANSI */
760 return TERM_CMD_RM_ANSI;
761 else if (flags == TERM_SEQ_FLAG_WHAT) /* RM DEC */
762 return TERM_CMD_RM_DEC;
765 if (flags == 0) /* DL */
769 if (flags == 0) /* SGR */
771 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SMR */
772 return TERM_CMD_XTERM_SRV;
775 if (flags == 0) /* DSR ANSI */
776 return TERM_CMD_DSR_ANSI;
777 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM RMR */
778 return TERM_CMD_XTERM_RRV;
779 else if (flags == TERM_SEQ_FLAG_WHAT) /* DSR DEC */
780 return TERM_CMD_DSR_DEC;
783 if (flags == 0) /* DCH */
785 else if (flags == TERM_SEQ_FLAG_SPACE) /* PPA */
789 if (flags == 0) /* DECSSL */
790 return TERM_CMD_DECSSL;
791 else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSSCLS */
792 return TERM_CMD_DECSSCLS;
793 else if (flags == TERM_SEQ_FLAG_BANG) /* DECSTR */
794 return TERM_CMD_DECSTR;
795 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCL */
796 return TERM_CMD_DECSCL;
797 else if (flags == TERM_SEQ_FLAG_CASH) /* DECRQM-ANSI */
798 return TERM_CMD_DECRQM_ANSI;
799 else if (flags == (TERM_SEQ_FLAG_CASH | TERM_SEQ_FLAG_WHAT)) /* DECRQM-DEC */
800 return TERM_CMD_DECRQM_DEC;
801 else if (flags == TERM_SEQ_FLAG_PCLOSE) /* DECSDPT */
802 return TERM_CMD_DECSDPT;
803 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSPPCS */
804 return TERM_CMD_DECSPPCS;
805 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSR */
806 return TERM_CMD_DECSR;
807 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECLTOD */
808 return TERM_CMD_DECLTOD;
809 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SPM */
810 return TERM_CMD_XTERM_SPM;
813 if (flags == TERM_SEQ_FLAG_SPACE) /* PPR */
817 if (flags == 0) /* DECLL */
818 return TERM_CMD_DECLL;
819 else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSCUSR */
820 return TERM_CMD_DECSCUSR;
821 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCA */
822 return TERM_CMD_DECSCA;
823 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSDDT */
824 return TERM_CMD_DECSDDT;
825 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSRC */
826 return TERM_CMD_DECSR;
827 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECELF */
828 return TERM_CMD_DECELF;
829 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECTID */
830 return TERM_CMD_DECTID;
833 if (flags == TERM_SEQ_FLAG_SPACE) /* PPB */
839 return TERM_CMD_DECSTBM;
840 } else if (flags == TERM_SEQ_FLAG_SPACE) {
842 return TERM_CMD_DECSKCV;
843 } else if (flags == TERM_SEQ_FLAG_CASH) {
845 return TERM_CMD_DECCARA;
846 } else if (flags == TERM_SEQ_FLAG_MULT) {
848 return TERM_CMD_DECSCS;
849 } else if (flags == TERM_SEQ_FLAG_PLUS) {
851 return TERM_CMD_DECSMKR;
852 } else if (flags == TERM_SEQ_FLAG_WHAT) {
854 * There's a conflict between DECPCTERM and XTERM-RPM.
855 * XTERM-RPM takes a single argument, DECPCTERM takes 2.
856 * Split both up and forward the call to the closer
859 if (seq->n_args <= 1) /* XTERM RPM */
860 return TERM_CMD_XTERM_RPM;
861 else if (seq->n_args >= 2) /* DECPCTERM */
862 return TERM_CMD_DECPCTERM;
866 if (flags == 0) /* SU */
868 else if (flags == TERM_SEQ_FLAG_WHAT) /* XTERM SGFX */
869 return TERM_CMD_XTERM_SGFX;
874 * There's a conflict between DECSLRM and SC-ANSI which
875 * cannot be resolved without knowing the state of
876 * DECLRMM. We leave that decision up to the caller.
878 return TERM_CMD_DECSLRM_OR_SC;
879 } else if (flags == TERM_SEQ_FLAG_CASH) {
881 return TERM_CMD_DECSPRTT;
882 } else if (flags == TERM_SEQ_FLAG_MULT) {
884 return TERM_CMD_DECSFC;
885 } else if (flags == TERM_SEQ_FLAG_WHAT) {
887 return TERM_CMD_XTERM_SPM;
893 * Awesome: There's a conflict between SD and XTERM IHMT
894 * that we have to resolve by checking the parameter
895 * count.. XTERM_IHMT needs exactly 5 arguments, SD
896 * takes 0 or 1. We're conservative here and give both
897 * a wider range to allow unused arguments (compat...).
899 if (seq->n_args >= 5) {
901 return TERM_CMD_XTERM_IHMT;
902 } else if (seq->n_args < 5) {
906 } else if (flags == TERM_SEQ_FLAG_GT) {
908 return TERM_CMD_XTERM_RTM;
913 if (seq->n_args > 0 && seq->args[0] < 24) {
915 return TERM_CMD_XTERM_WM;
918 return TERM_CMD_DECSLPP;
920 } else if (flags == TERM_SEQ_FLAG_SPACE) {
922 return TERM_CMD_DECSWBV;
923 } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
925 return TERM_CMD_DECSRFR;
926 } else if (flags == TERM_SEQ_FLAG_CASH) {
928 return TERM_CMD_DECRARA;
929 } else if (flags == TERM_SEQ_FLAG_GT) {
931 return TERM_CMD_XTERM_STM;
935 if (flags == 0) /* NP */
942 } else if (flags == TERM_SEQ_FLAG_SPACE) {
944 return TERM_CMD_DECSMBV;
945 } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
947 return TERM_CMD_DECSTRL;
948 } else if (flags == TERM_SEQ_FLAG_WHAT) {
950 return TERM_CMD_DECRQUPSS;
951 } else if (seq->args[0] == 1 && flags == TERM_SEQ_FLAG_CASH) {
953 return TERM_CMD_DECRQTSR;
954 } else if (flags == TERM_SEQ_FLAG_MULT) {
956 return TERM_CMD_DECSCP;
957 } else if (flags == TERM_SEQ_FLAG_COMMA) {
959 return TERM_CMD_DECRQKT;
963 if (flags == 0) /* PP */
967 if (flags == TERM_SEQ_FLAG_SPACE) /* DECSLCK */
968 return TERM_CMD_DECSLCK;
969 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECRQDE */
970 return TERM_CMD_DECRQDE;
971 else if (flags == TERM_SEQ_FLAG_CASH) /* DECCRA */
972 return TERM_CMD_DECCRA;
973 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECRPKT */
974 return TERM_CMD_DECRPKT;
977 if (seq->args[0] == 5 && flags == TERM_SEQ_FLAG_WHAT) {
979 return TERM_CMD_DECST8C;
983 if (flags == TERM_SEQ_FLAG_CASH) /* DECRQPSR */
984 return TERM_CMD_DECRQPSR;
985 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECEFR */
986 return TERM_CMD_DECEFR;
987 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSPP */
988 return TERM_CMD_DECSPP;
991 if (flags == 0) /* ECH */
995 if (flags == 0) /* DECREQTPARM */
996 return TERM_CMD_DECREQTPARM;
997 else if (flags == TERM_SEQ_FLAG_CASH) /* DECFRA */
998 return TERM_CMD_DECFRA;
999 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSACE */
1000 return TERM_CMD_DECSACE;
1001 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECRQPKFM */
1002 return TERM_CMD_DECRQPKFM;
1005 if (flags == 0) /* DECTST */
1006 return TERM_CMD_DECTST;
1007 else if (flags == TERM_SEQ_FLAG_MULT) /* DECRQCRA */
1008 return TERM_CMD_DECRQCRA;
1009 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKFMR */
1010 return TERM_CMD_DECPKFMR;
1013 if (flags == 0) /* CBT */
1014 return TERM_CMD_CBT;
1017 if (flags == TERM_SEQ_FLAG_CASH) /* DECERA */
1018 return TERM_CMD_DECERA;
1019 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECELR */
1020 return TERM_CMD_DECELR;
1021 else if (flags == TERM_SEQ_FLAG_MULT) /* DECINVM */
1022 return TERM_CMD_DECINVM;
1023 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKA */
1024 return TERM_CMD_DECPKA;
1027 if (flags == 0) /* ICH */
1028 return TERM_CMD_ICH;
1031 if (flags == 0) /* HPA */
1032 return TERM_CMD_HPA;
1035 if (flags == TERM_SEQ_FLAG_CASH) /* DECSERA */
1036 return TERM_CMD_DECSERA;
1037 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECSLE */
1038 return TERM_CMD_DECSLE;
1041 if (flags == TERM_SEQ_FLAG_CASH) /* DECSCPP */
1042 return TERM_CMD_DECSCPP;
1043 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECRQLP */
1044 return TERM_CMD_DECRQLP;
1045 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSNLS */
1046 return TERM_CMD_DECSNLS;
1049 if (flags == TERM_SEQ_FLAG_SPACE) /* DECKBD */
1050 return TERM_CMD_DECKBD;
1051 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSASD */
1052 return TERM_CMD_DECSASD;
1053 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECIC */
1054 return TERM_CMD_DECIC;
1057 if (flags == TERM_SEQ_FLAG_SPACE) /* DECTME */
1058 return TERM_CMD_DECTME;
1059 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSSDT */
1060 return TERM_CMD_DECSSDT;
1061 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECDC */
1062 return TERM_CMD_DECDC;
1066 return TERM_CMD_NONE;
1071 * This parser controls the parser-state and returns any detected sequence to
1072 * the caller. The parser is based on this state-diagram from Paul Williams:
1073 * http://vt100.net/emu/
1074 * It was written from scratch and extended where needed.
1075 * This parser is fully compatible up to the vt500 series. We expect UCS-4 as
1076 * input. It's the callers responsibility to do any UTF-8 parsing.
1080 STATE_NONE, /* placeholder */
1081 STATE_GROUND, /* initial state and ground */
1082 STATE_ESC, /* ESC sequence was started */
1083 STATE_ESC_INT, /* intermediate escape characters */
1084 STATE_CSI_ENTRY, /* starting CSI sequence */
1085 STATE_CSI_PARAM, /* CSI parameters */
1086 STATE_CSI_INT, /* intermediate CSI characters */
1087 STATE_CSI_IGNORE, /* CSI error; ignore this CSI sequence */
1088 STATE_DCS_ENTRY, /* starting DCS sequence */
1089 STATE_DCS_PARAM, /* DCS parameters */
1090 STATE_DCS_INT, /* intermediate DCS characters */
1091 STATE_DCS_PASS, /* DCS data passthrough */
1092 STATE_DCS_IGNORE, /* DCS error; ignore this DCS sequence */
1093 STATE_OSC_STRING, /* parsing OSC sequence */
1094 STATE_ST_IGNORE, /* unimplemented seq; ignore until ST */
1098 enum parser_action {
1099 ACTION_NONE, /* placeholder */
1100 ACTION_CLEAR, /* clear parameters */
1101 ACTION_IGNORE, /* ignore the character entirely */
1102 ACTION_PRINT, /* print the character on the console */
1103 ACTION_EXECUTE, /* execute single control character (C0/C1) */
1104 ACTION_COLLECT, /* collect intermediate character */
1105 ACTION_PARAM, /* collect parameter character */
1106 ACTION_ESC_DISPATCH, /* dispatch escape sequence */
1107 ACTION_CSI_DISPATCH, /* dispatch csi sequence */
1108 ACTION_DCS_START, /* start of DCS data */
1109 ACTION_DCS_COLLECT, /* collect DCS data */
1110 ACTION_DCS_CONSUME, /* consume DCS terminator */
1111 ACTION_DCS_DISPATCH, /* dispatch dcs sequence */
1112 ACTION_OSC_START, /* start of OSC data */
1113 ACTION_OSC_COLLECT, /* collect OSC data */
1114 ACTION_OSC_CONSUME, /* consume OSC terminator */
1115 ACTION_OSC_DISPATCH, /* dispatch osc sequence */
1119 int term_parser_new(term_parser **out, bool host) {
1120 _term_parser_free_ term_parser *parser = NULL;
1122 assert_return(out, -EINVAL);
1124 parser = new0(term_parser, 1);
1128 parser->is_host = host;
1129 parser->st_alloc = 64;
1130 parser->seq.st = new0(char, parser->st_alloc + 1);
1131 if (!parser->seq.st)
1139 term_parser *term_parser_free(term_parser *parser) {
1143 free(parser->seq.st);
1148 static inline void parser_clear(term_parser *parser) {
1151 parser->seq.command = TERM_CMD_NONE;
1152 parser->seq.terminator = 0;
1153 parser->seq.intermediates = 0;
1154 parser->seq.charset = TERM_CHARSET_NONE;
1155 parser->seq.n_args = 0;
1156 for (i = 0; i < TERM_PARSER_ARG_MAX; ++i)
1157 parser->seq.args[i] = -1;
1159 parser->seq.n_st = 0;
1160 parser->seq.st[0] = 0;
1163 static int parser_ignore(term_parser *parser, uint32_t raw) {
1164 parser_clear(parser);
1165 parser->seq.type = TERM_SEQ_IGNORE;
1166 parser->seq.command = TERM_CMD_NONE;
1167 parser->seq.terminator = raw;
1168 parser->seq.charset = TERM_CHARSET_NONE;
1170 return parser->seq.type;
1173 static int parser_print(term_parser *parser, uint32_t raw) {
1174 parser_clear(parser);
1175 parser->seq.type = TERM_SEQ_GRAPHIC;
1176 parser->seq.command = TERM_CMD_GRAPHIC;
1177 parser->seq.terminator = raw;
1178 parser->seq.charset = TERM_CHARSET_NONE;
1180 return parser->seq.type;
1183 static int parser_execute(term_parser *parser, uint32_t raw) {
1184 parser_clear(parser);
1185 parser->seq.type = TERM_SEQ_CONTROL;
1186 parser->seq.command = TERM_CMD_GRAPHIC;
1187 parser->seq.terminator = raw;
1188 parser->seq.charset = TERM_CHARSET_NONE;
1189 if (!parser->is_host)
1190 parser->seq.command = term_parse_host_control(&parser->seq);
1192 return parser->seq.type;
1195 static void parser_collect(term_parser *parser, uint32_t raw) {
1197 * Usually, characters from 0x30 to 0x3f are only allowed as leading
1198 * markers (or as part of the parameters), characters from 0x20 to 0x2f
1199 * are only allowed as trailing markers. However, our state-machine
1200 * already verifies those restrictions so we can handle them the same
1201 * way here. Note that we safely allow markers to be specified multiple
1205 if (raw >= 0x20 && raw <= 0x3f)
1206 parser->seq.intermediates |= 1 << (raw - 0x20);
1209 static void parser_param(term_parser *parser, uint32_t raw) {
1213 if (parser->seq.n_args < TERM_PARSER_ARG_MAX)
1214 ++parser->seq.n_args;
1219 if (parser->seq.n_args >= TERM_PARSER_ARG_MAX)
1222 if (raw >= '0' && raw <= '9') {
1223 new = parser->seq.args[parser->seq.n_args];
1226 new = new * 10 + raw - '0';
1228 /* VT510 tells us to clamp all values to [0, 9999], however, it
1229 * also allows commands with values up to 2^15-1. We simply use
1230 * 2^16 as maximum here to be compatible to all commands, but
1231 * avoid overflows in any calculations. */
1235 parser->seq.args[parser->seq.n_args] = new;
1239 static int parser_esc(term_parser *parser, uint32_t raw) {
1240 parser->seq.type = TERM_SEQ_ESCAPE;
1241 parser->seq.command = TERM_CMD_NONE;
1242 parser->seq.terminator = raw;
1243 parser->seq.charset = TERM_CHARSET_NONE;
1244 if (!parser->is_host)
1245 parser->seq.command = term_parse_host_escape(&parser->seq, &parser->seq.charset);
1247 return parser->seq.type;
1250 static int parser_csi(term_parser *parser, uint32_t raw) {
1251 /* parser->seq is cleared during CSI-ENTER state, thus there's no need
1252 * to clear invalid fields here. */
1254 if (parser->seq.n_args < TERM_PARSER_ARG_MAX) {
1255 if (parser->seq.n_args > 0 ||
1256 parser->seq.args[parser->seq.n_args] >= 0)
1257 ++parser->seq.n_args;
1260 parser->seq.type = TERM_SEQ_CSI;
1261 parser->seq.command = TERM_CMD_NONE;
1262 parser->seq.terminator = raw;
1263 parser->seq.charset = TERM_CHARSET_NONE;
1264 if (!parser->is_host)
1265 parser->seq.command = term_parse_host_csi(&parser->seq);
1267 return parser->seq.type;
1270 /* perform state transition and dispatch related actions */
1271 static int parser_transition(term_parser *parser, uint32_t raw, unsigned int state, unsigned int action) {
1272 if (state != STATE_NONE)
1273 parser->state = state;
1277 return TERM_SEQ_NONE;
1279 parser_clear(parser);
1280 return TERM_SEQ_NONE;
1282 return parser_ignore(parser, raw);
1284 return parser_print(parser, raw);
1285 case ACTION_EXECUTE:
1286 return parser_execute(parser, raw);
1287 case ACTION_COLLECT:
1288 parser_collect(parser, raw);
1289 return TERM_SEQ_NONE;
1291 parser_param(parser, raw);
1292 return TERM_SEQ_NONE;
1293 case ACTION_ESC_DISPATCH:
1294 return parser_esc(parser, raw);
1295 case ACTION_CSI_DISPATCH:
1296 return parser_csi(parser, raw);
1297 case ACTION_DCS_START:
1298 /* not implemented */
1299 return TERM_SEQ_NONE;
1300 case ACTION_DCS_COLLECT:
1301 /* not implemented */
1302 return TERM_SEQ_NONE;
1303 case ACTION_DCS_CONSUME:
1304 /* not implemented */
1305 return TERM_SEQ_NONE;
1306 case ACTION_DCS_DISPATCH:
1307 /* not implemented */
1308 return TERM_SEQ_NONE;
1309 case ACTION_OSC_START:
1310 /* not implemented */
1311 return TERM_SEQ_NONE;
1312 case ACTION_OSC_COLLECT:
1313 /* not implemented */
1314 return TERM_SEQ_NONE;
1315 case ACTION_OSC_CONSUME:
1316 /* not implemented */
1317 return TERM_SEQ_NONE;
1318 case ACTION_OSC_DISPATCH:
1319 /* not implemented */
1320 return TERM_SEQ_NONE;
1322 assert_not_reached("invalid vte-parser action");
1323 return TERM_SEQ_NONE;
1327 static int parser_feed_to_state(term_parser *parser, uint32_t raw) {
1328 switch (parser->state) {
1331 * During initialization, parser->state is cleared. Treat this
1332 * as STATE_GROUND. We will then never get to STATE_NONE again.
1336 case 0x00 ... 0x1f: /* C0 */
1337 case 0x80 ... 0x9b: /* C1 \ { ST } */
1339 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1341 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1344 return parser_transition(parser, raw, STATE_NONE, ACTION_PRINT);
1347 case 0x00 ... 0x1f: /* C0 */
1348 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1349 case 0x20 ... 0x2f: /* [' ' - '\'] */
1350 return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
1351 case 0x30 ... 0x4f: /* ['0' - '~'] \ { 'P', 'X', '[', ']', '^', '_' } */
1356 return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
1357 case 0x50: /* 'P' */
1358 return parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
1359 case 0x5b: /* '[' */
1360 return parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
1361 case 0x5d: /* ']' */
1362 return parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
1363 case 0x58: /* 'X' */
1364 case 0x5e: /* '^' */
1365 case 0x5f: /* '_' */
1366 return parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
1367 case 0x7f: /* DEL */
1368 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1370 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1373 return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
1376 case 0x00 ... 0x1f: /* C0 */
1377 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1378 case 0x20 ... 0x2f: /* [' ' - '\'] */
1379 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1380 case 0x30 ... 0x7e: /* ['0' - '~'] */
1381 return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
1382 case 0x7f: /* DEL */
1383 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1385 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1388 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1389 case STATE_CSI_ENTRY:
1391 case 0x00 ... 0x1f: /* C0 */
1392 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1393 case 0x20 ... 0x2f: /* [' ' - '\'] */
1394 return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
1395 case 0x3a: /* ':' */
1396 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1397 case 0x30 ... 0x39: /* ['0' - '9'] */
1398 case 0x3b: /* ';' */
1399 return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_PARAM);
1400 case 0x3c ... 0x3f: /* ['<' - '?'] */
1401 return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_COLLECT);
1402 case 0x40 ... 0x7e: /* ['@' - '~'] */
1403 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1404 case 0x7f: /* DEL */
1405 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1407 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1410 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1411 case STATE_CSI_PARAM:
1413 case 0x00 ... 0x1f: /* C0 */
1414 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1415 case 0x20 ... 0x2f: /* [' ' - '\'] */
1416 return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
1417 case 0x30 ... 0x39: /* ['0' - '9'] */
1418 case 0x3b: /* ';' */
1419 return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
1420 case 0x3a: /* ':' */
1421 case 0x3c ... 0x3f: /* ['<' - '?'] */
1422 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1423 case 0x40 ... 0x7e: /* ['@' - '~'] */
1424 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1425 case 0x7f: /* DEL */
1426 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1428 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1431 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1434 case 0x00 ... 0x1f: /* C0 */
1435 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1436 case 0x20 ... 0x2f: /* [' ' - '\'] */
1437 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1438 case 0x30 ... 0x3f: /* ['0' - '?'] */
1439 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1440 case 0x40 ... 0x7e: /* ['@' - '~'] */
1441 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1442 case 0x7f: /* DEL */
1443 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1445 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1448 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1449 case STATE_CSI_IGNORE:
1451 case 0x00 ... 0x1f: /* C0 */
1452 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1453 case 0x20 ... 0x3f: /* [' ' - '?'] */
1454 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1455 case 0x40 ... 0x7e: /* ['@' - '~'] */
1456 return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
1457 case 0x7f: /* DEL */
1458 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1460 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1463 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1464 case STATE_DCS_ENTRY:
1466 case 0x00 ... 0x1f: /* C0 */
1467 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1468 case 0x20 ... 0x2f: /* [' ' - '\'] */
1469 return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
1470 case 0x3a: /* ':' */
1471 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1472 case 0x30 ... 0x39: /* ['0' - '9'] */
1473 case 0x3b: /* ';' */
1474 return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_PARAM);
1475 case 0x3c ... 0x3f: /* ['<' - '?'] */
1476 return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_COLLECT);
1477 case 0x40 ... 0x7e: /* ['@' - '~'] */
1478 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1479 case 0x7f: /* DEL */
1480 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1482 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1485 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1486 case STATE_DCS_PARAM:
1488 case 0x00 ... 0x1f: /* C0 */
1489 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1490 case 0x20 ... 0x2f: /* [' ' - '\'] */
1491 return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
1492 case 0x30 ... 0x39: /* ['0' - '9'] */
1493 case 0x3b: /* ';' */
1494 return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
1495 case 0x3a: /* ':' */
1496 case 0x3c ... 0x3f: /* ['<' - '?'] */
1497 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1498 case 0x40 ... 0x7e: /* ['@' - '~'] */
1499 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1500 case 0x7f: /* DEL */
1501 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1503 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1506 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1509 case 0x00 ... 0x1f: /* C0 */
1510 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1511 case 0x20 ... 0x2f: /* [' ' - '\'] */
1512 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1513 case 0x30 ... 0x3f: /* ['0' - '?'] */
1514 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1515 case 0x40 ... 0x7e: /* ['@' - '~'] */
1516 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1517 case 0x7f: /* DEL */
1518 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1520 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1523 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1524 case STATE_DCS_PASS:
1526 case 0x00 ... 0x7e: /* ASCII \ { DEL } */
1527 return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
1528 case 0x7f: /* DEL */
1529 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1531 return parser_transition(parser, raw, STATE_GROUND, ACTION_DCS_DISPATCH);
1534 return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
1535 case STATE_DCS_IGNORE:
1537 case 0x00 ... 0x7f: /* ASCII */
1538 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1540 return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
1543 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1544 case STATE_OSC_STRING:
1546 case 0x00 ... 0x06: /* C0 \ { BEL } */
1548 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1549 case 0x20 ... 0x7f: /* [' ' - DEL] */
1550 return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
1551 case 0x07: /* BEL */
1553 return parser_transition(parser, raw, STATE_GROUND, ACTION_OSC_DISPATCH);
1556 return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
1557 case STATE_ST_IGNORE:
1559 case 0x00 ... 0x7f: /* ASCII */
1560 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1562 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1565 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1568 assert_not_reached("bad vte-parser state");
1572 int term_parser_feed(term_parser *parser, const term_seq **seq_out, uint32_t raw) {
1575 assert_return(parser, -EINVAL);
1576 assert_return(seq_out, -EINVAL);
1580 * * DEC treats GR codes as GL. We don't do that as we require UTF-8
1581 * as charset and, thus, it doesn't make sense to treat GR special.
1582 * * During control sequences, unexpected C1 codes cancel the sequence
1583 * and immediately start a new one. C0 codes, however, may or may not
1584 * be ignored/executed depending on the sequence.
1588 case 0x18: /* CAN */
1589 r = parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1591 case 0x1a: /* SUB */
1592 r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
1594 case 0x80 ... 0x8f: /* C1 \ {DCS, SOS, CSI, ST, OSC, PM, APC} */
1597 r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
1599 case 0x1b: /* ESC */
1600 r = parser_transition(parser, raw, STATE_ESC, ACTION_CLEAR);
1602 case 0x98: /* SOS */
1604 case 0x9f: /* APC */
1605 r = parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
1607 case 0x90: /* DCS */
1608 r = parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
1610 case 0x9d: /* OSC */
1611 r = parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
1613 case 0x9b: /* CSI */
1614 r = parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
1617 r = parser_feed_to_state(parser, raw);
1624 *seq_out = &parser->seq;