Changeset 2582 for neercs


Ignore:
Timestamp:
Jul 23, 2008, 11:55:38 PM (12 years ago)
Author:
bsittler
Message:

improved terminal emulation (partial ISO-2022 support)

Location:
neercs/trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • neercs/trunk/src/neercs.h

    r2511 r2582  
    3737
    3838
     39
     40/* ISO-2022 Conversion State */
     41struct iso2022_conv_state
     42{
     43    /* cs = coding system/coding method: */
     44    /* (with standard return) */
     45    /* '@' = ISO-2022, */
     46    /* 'G' = UTF-8 without implementation level, */
     47    /* '8' = UTF-8 (Linux console and imitators), */
     48    /* and many others that are rarely used; */
     49    /* (without standard return) */
     50    /* '/G' = UTF-8 Level 1, */
     51    /* '/H' = UTF-8 Level 2, */
     52    /* '/I' = UTF-8 Level 3, */
     53    /* and many others that are rarely used */
     54    uint32_t cs;
     55    /* ctrl8bit = allow 8-bit controls */
     56    uint8_t ctrl8bit;
     57    /* cn[0] = C0 control charset (0x00 ... 0x1f):
     58     * '@' = ISO 646,
     59     * '~' = empty,
     60     * and many others that are rarely used */
     61    /* cn[1] = C1 control charset (0x80 ... 0x9f):
     62     * 'C' = ISO 6429-1983,
     63     * '~' = empty,
     64     * and many others that are rarely used */
     65    uint32_t cn[2];
     66    /* glr[0] = GL graphic charset (94-char. 0x21 ... 0x7e,
     67     *                              94x94-char. 0x21/0x21 ... 0x7e/0x7e),
     68     * and
     69     * glr[1] = GR graphic charset (94-char. 0xa1 ... 0xfe,
     70     *                              96-char. 0xa0 ... 0xff,
     71     *                              94x94-char. 0xa1/0xa1 ... 0xfe/0xfe,
     72     *                              96x96-char. 0xa0/0xa0 ... 0xff/0xff):
     73     * 0 = G0, 1 = G1, 2 = G2, 3 = G3 */
     74    uint8_t glr[2];
     75    /* gn[i] = G0/G1/G2/G3 graphic charset state:
     76     * (94-char. sets)
     77     * '0' = DEC ACS (VT100 and imitators),
     78     * 'B' = US-ASCII,
     79     * and many others that are rarely used for e.g. various national ASCII variations;
     80     * (96-char. sets)
     81     * '.A' = ISO 8859-1 "Latin 1" GR,
     82     * '.~' = empty 96-char. set,
     83     * and many others that are rarely used for e.g. ISO 8859-n GR;
     84     * (double-byte 94x94-charsets)
     85     * '$@' = Japanese Character Set ("old JIS") (JIS C 6226:1978),
     86     * '$A' = Chinese Character Set (GB 2312),
     87     * '$B' = Japanese Character Set (JIS X0208/JIS C 6226:1983),
     88     * '$C' = Korean Graphic Character Set (KSC 5601:1987),
     89     * '$D' = Supplementary Japanese Graphic Character Set (JIS X0212),
     90     * '$E' = CCITT Chinese Set (GB 2312 + GB 8565),
     91     * '$G' = CNS 11643 plane 1,
     92     * '$H' = CNS 11643 plane 2,
     93     * '$I' = CNS 11643 plane 3,
     94     * '$J' = CNS 11643 plane 4,
     95     * '$K' = CNS 11643 plane 5,
     96     * '$L' = CNS 11643 plane 6,
     97     * '$M' = CNS 11643 plane 7,
     98     * '$O' = JIS X 0213 plane 1,
     99     * '$P' = JIS X 0213 plane 2,
     100     * '$Q' = JIS X 0213-2004 Plane 1,
     101     * and many others that are rarely used for e.g. traditional
     102     * ideographic Vietnamese and BlissSymbolics;
     103     * (double-byte 96x96-charsets)
     104     * none standardized or in use on terminals AFAIK (Mule does use
     105     * some internally)
     106     */
     107    uint32_t gn[4];
     108    /* ss = single-shift state: 0 = GL, 2 = G2, 3 = G3 */
     109    uint8_t ss;
     110};
     111
    39112struct screen
    40113{
     
    47120    uint8_t bold, blink, italics, negative, concealed, underline;
    48121    uint8_t faint, strike, proportional; /* unsupported */
     122    struct iso2022_conv_state conv_state; /* charset mess */
    49123
    50124    /* Other stuff */
     
    63137    int orig_x, orig_y;          /* Used by recurrents */
    64138    int orig_w, orig_h;          /* Used by recurrents */
    65 
    66139};
    67140
  • neercs/trunk/src/term.c

    r2507 r2582  
    3232#include "neercs.h"
    3333
     34/* DEC ACS with common extensions */
     35static uint32_t dec_acs(uint32_t uc)
     36{
     37    switch (uc)
     38    {
     39    case '+': return 0x2192; /* RIGHTWARDS ARROW */
     40    case ',': return 0x2190; /* LEFTWARDS ARROW */
     41    case '-': return 0x2191; /* UPWARDS ARROW */
     42    case '.': return 0x2193; /* DOWNWARDS ARROW */
     43    case '0': return 0x25AE; /* BLACK VERTICAL RECTANGLE */
     44    case '_': return 0x25AE; /* BLACK VERTICAL RECTANGLE */
     45    case '`': return 0x25C6; /* BLACK DIAMOND */
     46    case 'a': return 0x2592; /* MEDIUM SHADE */
     47    case 'b': return 0x2409; /* SYMBOL FOR HORIZONTAL TABULATION */
     48    case 'c': return 0x240C; /* SYMBOL FOR FORM FEED */
     49    case 'd': return 0x240D; /* SYMBOL FOR CARRIAGE RETURN */
     50    case 'e': return 0x240A; /* SYMBOL FOR LINE FEED */
     51    case 'f': return 0x00B0; /* DEGREE SIGN */
     52    case 'g': return 0x00B1; /* PLUS-MINUS SIGN */
     53    case 'h': return 0x2424; /* SYMBOL FOR NEWLINE */
     54    case 'i': return 0x240B; /* SYMBOL FOR VERTICAL TABULATION */
     55    case 'j': return 0x2518; /* BOX DRAWINGS LIGHT UP AND LEFT */
     56    case 'k': return 0x2510; /* BOX DRAWINGS LIGHT DOWN AND LEFT */
     57    case 'l': return 0x250C; /* BOX DRAWINGS LIGHT DOWN AND RIGHT */
     58    case 'm': return 0x2514; /* BOX DRAWINGS LIGHT UP AND RIGHT */
     59    case 'n': return 0x253C; /* BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL */
     60    case 'o': return 0x23BA; /* HORIZONTAL SCAN LINE-1 */
     61    case 'p': return 0x23BB; /* HORIZONTAL SCAN LINE-3 */
     62    case 'q': return 0x2500; /* BOX DRAWINGS LIGHT HORIZONTAL */
     63    case 'r': return 0x23BC; /* HORIZONTAL SCAN LINE-7 */
     64    case 's': return 0x23BD; /* HORIZONTAL SCAN LINE-9 */
     65    case 't': return 0x251C; /* BOX DRAWINGS LIGHT VERTICAL AND RIGHT */
     66    case 'u': return 0x2524; /* BOX DRAWINGS LIGHT VERTICAL AND LEFT */
     67    case 'v': return 0x2534; /* BOX DRAWINGS LIGHT UP AND HORIZONTAL */
     68    case 'w': return 0x252C; /* BOX DRAWINGS LIGHT DOWN AND HORIZONTAL */
     69    case 'x': return 0x2502; /* BOX DRAWINGS LIGHT VERTICAL */
     70    case 'y': return 0x2264; /* LESS-THAN OR EQUAL TO */
     71    case 'z': return 0x2265; /* GREATER-THAN OR EQUAL TO */
     72    case '{': return 0x03C0; /* GREEK SMALL LETTER PI */
     73    case '|': return 0x2260; /* NOT EQUAL TO */
     74    case '}': return 0x00A3; /* POUND SIGN */
     75    case '~': return 0x00B7; /* MIDDLE DOT */
     76    default:
     77        return uc;
     78    }
     79};
     80
     81static void reset_conv_state(struct screen *);
     82
     83#define LITERAL2CHAR(i0,i1) (((i0) << 8) | (i1))
     84
     85#define LITERAL3CHAR(i0,i1,i2) LITERAL2CHAR(LITERAL2CHAR(i0, i1), i2)
     86
    3487static void ansi_parse_grcm(struct screen *,
    3588                            unsigned int, unsigned int const *);
     
    58111        ansi_parse_grcm(sc, 1, &dummy);
    59112
     113        reset_conv_state(sc);
     114
    60115        sc->init = 1;
    61116    }
     
    93148            if(x > 0)
    94149                x--;
     150        }
     151
     152        else if(buffer[i] == '\x0e')
     153        {
     154            /* Shift Out (Ctrl-N) -> Switch to
     155             * Alternate Character Set: invokes
     156             * the G1 character set. */
     157            sc->conv_state.glr[0] = 1;
     158        }
     159
     160        else if(buffer[i] == '\x0f')
     161        {
     162            /* Shift In (Ctrl-O) -> Switch to
     163             * Standard Character Set: invokes
     164             * the G0 character set. */
     165            sc->conv_state.glr[0] = 0;
    95166        }
    96167
     
    100171            break;
    101172
    102         /* XXX: What the fuck is this shit? */
    103         else if(buffer[i] == '\x1b' && buffer[i + 1] == '('
    104                  && buffer[i + 2] == 'B')
    105         {
     173        /* Single Shift Select of G2 Character Set (SS2: 0x8e):
     174         * affects next character only */
     175        else if(buffer[i] == '\x1b' && buffer[i + 1] == 'N')
     176        {
     177            sc->conv_state.ss = 2;
     178            skip += 1;
     179        }
     180
     181        /* Single Shift Select of G3 Character Set (SS2: 0x8f):
     182         * affects next character only */
     183        else if(buffer[i] == '\x1b' && buffer[i + 1] == 'O')
     184        {
     185            sc->conv_state.ss = 3;
     186            skip += 1;
     187        }
     188
     189        /* LOCKING-SHIFT TWO (LS2), ISO 2022, ECMA-48 (1986), ISO 6429 : 1988 */
     190        else if(buffer[i] == '\x1b' && buffer[i + 1] == 'n')
     191        {
     192            sc->conv_state.glr[0] = 2;
     193            skip += 1;
     194        }
     195
     196        /* LOCKING-SHIFT THREE (LS3) ISO 2022, ECMA-48 (1986), ISO 6429 : 1988 */
     197        else if(buffer[i] == '\x1b' && buffer[i + 1] == 'o')
     198        {
     199            sc->conv_state.glr[0] = 3;
     200            skip += 1;
     201        }
     202
     203        /* RESET TO INITIAL STATE (RIS), ECMA-48 (1986), ISO 6429 : 1988 */
     204        else if(buffer[i] == '\x1b' && buffer[i + 1] == 'c')
     205        {
     206            sc->dfg = CUCUL_DEFAULT;
     207            sc->dbg = CUCUL_DEFAULT;
     208
     209            cucul_set_color_ansi(sc->cv, sc->dfg, sc->dbg);
     210            sc->clearattr = cucul_get_attr(sc->cv, -1, -1);
     211            ansi_parse_grcm(sc, 1, &dummy);
     212
     213            reset_conv_state(sc);
     214            skip += 1;
     215        }
     216
     217        /* Coding Method Delimiter (CMD), ECMA-48 (1991), ISO/IEC 6429:1992 (ISO IR 189) */
     218        else if(buffer[i] == '\x1b' && buffer[i + 1] == 'd')
     219        {
     220            reset_conv_state(sc);
     221            skip += 1;
     222        }
     223
     224        /* GZDM4, G0-Designators, multi, 94^n chars [grandfathered short form from ISO 2022:1986] */
     225        else if(buffer[i] == '\x1b' && buffer[i + 1] == '$' && (buffer[i + 2] >= '@') && (buffer[i + 2] <= 'C'))
     226        {
     227            sc->conv_state.gn[0] = LITERAL2CHAR('$', buffer[i + 2]);
    106228            skip += 2;
     229        }
     230
     231        /* GnDMx Gn-Designators, 9x^n chars; need one more char to distinguish these */
     232        else if(buffer[i] == '\x1b' && buffer[i + 1] == '$' && (i + 3 >= size))
     233            break;
     234
     235        /* GZD4 G0-Designator, 94 chars */
     236        else if(buffer[i] == '\x1b' && buffer[i + 1] == '(')
     237        {
     238            sc->conv_state.gn[0] = buffer[i + 2];
     239            skip += 2;
     240        }
     241
     242        /* G1D4 G1-Designator, 94 chars */
     243        else if(buffer[i] == '\x1b' && buffer[i + 1] == ')')
     244        {
     245            sc->conv_state.gn[1] = buffer[i + 2];
     246            skip += 2;
     247        }
     248
     249        /* G2D4 G2-Designator, 94 chars */
     250        else if(buffer[i] == '\x1b' && buffer[i + 1] == '*')
     251        {
     252            sc->conv_state.gn[2] = buffer[i + 2];
     253            skip += 2;
     254        }
     255
     256        /* G3D4 G3-Designator, 94 chars */
     257        else if(buffer[i] == '\x1b' && buffer[i + 1] == '+')
     258        {
     259            sc->conv_state.gn[3] = buffer[i + 2];
     260            skip += 2;
     261        }
     262
     263        /* G2D6 G2-Designator, 96 chars */
     264        else if(buffer[i] == '\x1b' && buffer[i + 1] == '.')
     265        {
     266            sc->conv_state.gn[2] = LITERAL2CHAR('.', buffer[i + 2]);
     267            skip += 2;
     268        }
     269
     270        /* G3D6 G3-Designator, 96 chars */
     271        else if(buffer[i] == '\x1b' && buffer[i + 1] == '/')
     272        {
     273            sc->conv_state.gn[3] = LITERAL2CHAR('.', buffer[i + 2]);
     274            skip += 2;
     275        }
     276
     277        /* GZDM4 G0-Designator, 94^n chars */
     278        else if(buffer[i] == '\x1b' && buffer[i + 1] == '$' && buffer[i + 2] == '(')
     279        {
     280            sc->conv_state.gn[0] = LITERAL2CHAR('$', buffer[i + 3]);
     281            skip += 3;
     282        }
     283
     284        /* G1DM4 G1-Designator, 94^n chars */
     285        else if(buffer[i] == '\x1b' && buffer[i + 1] == '$' && buffer[i + 2] == ')')
     286        {
     287            sc->conv_state.gn[1] = LITERAL2CHAR('$', buffer[i + 3]);
     288            skip += 3;
     289        }
     290
     291        /* G2DM4 G2-Designator, 94^n chars */
     292        else if(buffer[i] == '\x1b' && buffer[i + 1] == '$' && buffer[i + 2] == '*')
     293        {
     294            sc->conv_state.gn[2] = LITERAL2CHAR('$', buffer[i + 3]);
     295            skip += 3;
     296        }
     297
     298        /* G3DM4 G3-Designator, 94^n chars */
     299        else if(buffer[i] == '\x1b' && buffer[i + 1] == '$' && buffer[i + 2] == '+')
     300        {
     301            sc->conv_state.gn[3] = LITERAL2CHAR('$', buffer[i + 3]);
     302            skip += 3;
     303        }
     304
     305        /* G2DM6 G2-Designator, 96^n chars */
     306        else if(buffer[i] == '\x1b' && buffer[i + 1] == '$' && buffer[i + 2] == '.')
     307        {
     308            sc->conv_state.gn[2] = LITERAL3CHAR('$', '.', buffer[i + 3]);
     309            skip += 3;
     310        }
     311
     312        /* G3DM6 G3-Designator, 96^n chars */
     313        else if(buffer[i] == '\x1b' && buffer[i + 1] == '$' && buffer[i + 2] == '/')
     314        {
     315            sc->conv_state.gn[3] = LITERAL3CHAR('$', '.', buffer[i + 3]);
     316            skip += 3;
    107317        }
    108318
     
    366576                bytes = 1;
    367577            }
     578
     579            /* very incomplete ISO-2022 implementation tailored to DEC ACS */
     580            if(sc->conv_state.cs == '@')
     581            {
     582                if (((ch > ' ') && (ch <= '~'))
     583                    &&
     584                    (sc->conv_state.gn[sc->conv_state.ss ? sc->conv_state.gn[sc->conv_state.ss] : sc->conv_state.glr[0]] == '0'))
     585                {
     586                    ch = dec_acs(ch);
     587                }
     588                else if (((ch > 0x80) && (ch < 0xff))
     589                         &&
     590                         (sc->conv_state.gn[sc->conv_state.glr[1]] == '0'))
     591                {
     592                    ch = dec_acs(ch + ' ' - 0x80);
     593                }
     594            }
     595            sc->conv_state.ss = 0; /* no single-shift (GL) */
     596
    368597            wch = cucul_utf32_is_fullwidth(ch) ? 2 : 1;
    369598            skip += bytes - 1;
     
    410639
    411640    return i;
     641}
     642
     643/* Coding Method Delimiter (CMD), ECMA-48 (1991), ISO/IEC 6429:1992 (ISO IR 189) */
     644
     645static void reset_conv_state(struct screen *sc)
     646{
     647    sc->conv_state.cs = '@'; /* ISO-2022 coding system */
     648    sc->conv_state.cn[0] = '@'; /* ISO 646 C0 control charset */
     649    sc->conv_state.cn[1] = 'C'; /* ISO 6429-1983 C1 control charset */
     650    sc->conv_state.glr[0] = 0; /* G0 in GL */
     651    sc->conv_state.glr[1] = 2; /* G2 in GR */
     652    sc->conv_state.gn[0] = 'B'; /* US-ASCII G0 charset */
     653    sc->conv_state.gn[1] = '0'; /* DEC ACS G1 charset */
     654    sc->conv_state.gn[2] = LITERAL2CHAR('.', 'A'); /* ISO 8859-1 G2 charset */
     655    sc->conv_state.gn[3] = LITERAL2CHAR('.', 'A'); /* ISO 8859-1 G3 charset */
     656    sc->conv_state.ss = 0; /* no single-shift (GL) */
     657    sc->conv_state.ctrl8bit = 1;
    412658}
    413659
Note: See TracChangeset for help on using the changeset viewer.