source: libcaca/trunk/cucul/import.c @ 1378

Last change on this file since 1378 was 1378, checked in by Sam Hocevar, 16 years ago
  • When importing an utf-8 file, set background and foreground colours to the nearest ANSI values for the current attribute. When importing an ANSI file, force them to lightgray on black.
  • Property svn:keywords set to Id
File size: 19.5 KB
Line 
1/*
2 *  libcucul      Canvas for ultrafast compositing of Unicode letters
3 *  Copyright (c) 2002-2006 Sam Hocevar <sam@zoy.org>
4 *                All Rights Reserved
5 *
6 *  $Id: import.c 1378 2006-11-12 21:29:31Z sam $
7 *
8 *  This library is free software; you can redistribute it and/or
9 *  modify it under the terms of the Do What The Fuck You Want To
10 *  Public License, Version 2, as published by Sam Hocevar. See
11 *  http://sam.zoy.org/wtfpl/COPYING for more details.
12 */
13
14/*
15 *  This file contains various import functions.
16 */
17
18#include "config.h"
19#include "common.h"
20
21#if !defined __KERNEL__
22#   include <stdio.h>
23#   include <stdlib.h>
24#   include <string.h>
25#endif
26
27#include "cucul.h"
28#include "cucul_internals.h"
29
30static inline uint32_t sscanu32(void const *s)
31{
32    uint32_t x;
33    memcpy(&x, s, 4);
34    return hton32(x);
35}
36
37static inline uint16_t sscanu16(void const *s)
38{
39    uint16_t x;
40    memcpy(&x, s, 2);
41    return hton16(x);
42}
43
44/* ANSI Graphic Rendition Combination Mode */
45struct ansi_grcm
46{
47    uint8_t fg, bg;   /* ANSI-context fg/bg */
48    uint8_t efg, ebg; /* Effective (libcucul) fg/bg */
49    uint8_t dfg, dbg; /* Default fg/bg */
50    uint8_t bold, negative, concealed;
51};
52
53static long int import_caca(cucul_canvas_t *, void const *, unsigned int);
54static long int import_text(cucul_canvas_t *, void const *, unsigned int);
55static long int import_ansi(cucul_canvas_t *, void const *, unsigned int, int);
56
57static void ansi_parse_grcm(cucul_canvas_t *, struct ansi_grcm *,
58                            unsigned int, unsigned int const *);
59
60/** \brief Import a memory buffer into a canvas
61 *
62 *  Import a memory buffer into the given libcucul canvas's current
63 *  frame. The current frame is resized accordingly and its contents are
64 *  replaced with the imported data.
65 *
66 *  Valid values for \c format are:
67 *  - \c "": attempt to autodetect the file format.
68 *  - \c "caca": import native libcaca files.
69 *  - \c "text": import ASCII text files.
70 *  - \c "ansi": import ANSI files.
71 *  - \c "utf8": import UTF-8 files with ANSI colour codes.
72 *
73 *  The number of bytes read is returned. If the file format is valid, but
74 *  not enough data was available, 0 is returned.
75 *
76 *  If an error occurs, -1 is returned and \b errno is set accordingly:
77 *  - \c ENOMEM Not enough memory to allocate canvas.
78 *  - \c EINVAL Invalid format requested.
79 *
80 *  \param cv A libcucul canvas in which to import the file.
81 *  \param data A memory area containing the data to be loaded into the canvas.
82 *  \param len The size in bytes of the memory area.
83 *  \param format A string describing the input format.
84 *  \return The number of bytes read, or 0 if there was not enough data,
85 *  or -1 if an error occurred.
86 */
87long int cucul_import_memory(cucul_canvas_t *cv, void const *data,
88                             unsigned long int len, char const *format)
89{
90    if(!strcasecmp("caca", format))
91        return import_caca(cv, data, len);
92    if(!strcasecmp("utf8", format))
93        return import_ansi(cv, data, len, 1);
94    if(!strcasecmp("text", format))
95        return import_text(cv, data, len);
96    if(!strcasecmp("ansi", format))
97        return import_ansi(cv, data, len, 0);
98
99    /* Autodetection */
100    if(!strcasecmp("", format))
101    {
102        unsigned char const *str = data;
103        unsigned int i;
104
105        /* If 4 first bytes are 0xcaca + 'CV' */
106        if(len >= 4 && str[0] == 0xca &&
107           str[1] == 0xca && str[2] == 'C' && str[3] == 'V')
108            return import_caca(cv, data, len);
109
110        /* If we find ESC[ argv, we guess it's an ANSI file */
111        for(i = 0; i + 1 < len; i++)
112            if((str[i] == 0x1b) && (str[i + 1] == '['))
113                return import_ansi(cv, data, len, 0);
114
115        /* Otherwise, import it as text */
116        return import_text(cv, data, len);
117    }
118
119    seterrno(EINVAL);
120    return -1;
121}
122
123/** \brief Import a file into a canvas
124 *
125 *  Import a file into the given libcucul canvas's current frame. The
126 *  current frame is resized accordingly and its contents are replaced
127 *  with the imported data.
128 *
129 *  Valid values for \c format are:
130 *  - \c "": attempt to autodetect the file format.
131 *  - \c "caca": import native libcaca files.
132 *  - \c "text": import ASCII text files.
133 *  - \c "ansi": import ANSI files.
134 *  - \c "utf8": import UTF-8 files with ANSI colour codes.
135 *
136 *  The number of bytes read is returned. If the file format is valid, but
137 *  not enough data was available, 0 is returned.
138 *
139 *  If an error occurs, -1 is returned and \b errno is set accordingly:
140 *  - \c ENOSYS File access is not implemented on this system.
141 *  - \c ENOMEM Not enough memory to allocate canvas.
142 *  - \c EINVAL Invalid format requested.
143 *  cucul_import_file() may also fail and set \b errno for any of the
144 *  errors specified for the routine fopen().
145 *
146 *  \param cv A libcucul canvas in which to import the file.
147 *  \param filename The name of the file to load.
148 *  \param format A string describing the input format.
149 *  \return The number of bytes read, or 0 if there was not enough data,
150 *  or -1 if an error occurred.
151 */
152long int cucul_import_file(cucul_canvas_t *cv, char const *filename,
153                           char const *format)
154{
155#if defined __KERNEL__
156    seterrno(ENOSYS);
157    return -1;
158#else
159    FILE *fp;
160    void *data;
161    long int size;
162    int ret;
163
164    fp = fopen(filename, "rb");
165    if(!fp)
166        return -1; /* fopen already set errno */
167
168    fseek(fp, 0, SEEK_END);
169    size = ftell(fp);
170
171    data = malloc(size);
172    if(!data)
173    {
174        fclose(fp);
175        seterrno(ENOMEM);
176        return -1;
177    }
178
179    fseek(fp, 0, SEEK_SET);
180    fread(data, size, 1, fp);
181    fclose(fp);
182
183    ret = cucul_import_memory(cv, data, size, format);
184    free(data);
185
186    return ret;
187#endif
188}
189   
190/** \brief Get available import formats
191 *
192 *  Return a list of available import formats. The list is a NULL-terminated
193 *  array of strings, interleaving a string containing the internal value for
194 *  the import format, to be used with cucul_import_canvas(), and a string
195 *  containing the natural language description for that import format.
196 *
197 *  This function never fails.
198 *
199 *  \return An array of strings.
200 */
201char const * const * cucul_get_import_list(void)
202{
203    static char const * const list[] =
204    {
205        "", "autodetect",
206        "caca", "native libcaca format",
207        "text", "plain text",
208        "ansi", "ANSI coloured text",
209        "utf8", "UTF-8 files with ANSI colour codes",
210        NULL, NULL
211    };
212
213    return list;
214}
215
216/*
217 * XXX: the following functions are local.
218 */
219
220static long int import_caca(cucul_canvas_t *cv,
221                            void const *data, unsigned int size)
222{
223    uint8_t const *buf = (uint8_t const *)data;
224    unsigned int control_size, data_size, expected_size, frames, f, n;
225    uint16_t version, flags;
226
227    if(size < 20)
228        return 0;
229
230    if(buf[0] != 0xca || buf[1] != 0xca || buf[2] != 'C' || buf[3] != 'V')
231    {
232        debug("caca import error: expected \\xca\\xcaCV header");
233        goto invalid_caca;
234    }
235
236    control_size = sscanu32(buf + 4);
237    data_size = sscanu32(buf + 8);
238    version = sscanu16(buf + 12);
239    frames = sscanu32(buf + 14);
240    flags = sscanu16(buf + 18);
241
242    if(size < 4 + control_size + data_size)
243        return 0;
244
245    if(control_size < 16 + frames * 32)
246    {
247        debug("caca import error: control size %lu < expected %lu",
248              (unsigned long int)control_size, 16 + frames * 32);
249        goto invalid_caca;
250    }
251
252    for(expected_size = 0, f = 0; f < frames; f++)
253    {
254        unsigned int width, height, duration;
255        uint32_t attr;
256        int x, y, handlex, handley;
257
258        width = sscanu32(buf + 4 + 16 + f * 24);
259        height = sscanu32(buf + 4 + 16 + f * 24 + 4);
260        duration = sscanu32(buf + 4 + 16 + f * 24 + 8);
261        attr = sscanu32(buf + 4 + 16 + f * 24 + 12);
262        x = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 16);
263        y = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 20);
264        handlex = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 24);
265        handley = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 28);
266
267        expected_size += width * height * 8;
268    }
269
270    if(expected_size != data_size)
271    {
272        debug("caca import error: data size %lu < expected %lu",
273              (unsigned long int)data_size, (unsigned long int)expected_size);
274        goto invalid_caca;
275    }
276
277    /* FIXME: read all frames, not only the first one */
278    cucul_set_canvas_size(cv, 0, 0);
279    cucul_set_canvas_size(cv, sscanu32(buf + 4 + 16),
280                              sscanu32(buf + 4 + 16 + 4));
281
282    /* FIXME: check for return value */
283
284    for(n = sscanu32(buf + 4 + 16) * sscanu32(buf + 4 + 16 + 4); n--; )
285    {
286        cv->chars[n] = sscanu32(buf + 4 + control_size + 8 * n);
287        cv->attrs[n] = sscanu32(buf + 4 + control_size + 8 * n + 4);
288    }
289
290    cv->curattr = sscanu32(buf + 4 + 16 + 12);
291    cv->frames[0].x = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 16);
292    cv->frames[0].y = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 20);
293    cv->frames[0].handlex = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 24);
294    cv->frames[0].handley = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 28);
295
296    return 4 + control_size + data_size;
297
298invalid_caca:
299    seterrno(EINVAL);
300    return -1;
301}
302
303static long int import_text(cucul_canvas_t *cv,
304                            void const *data, unsigned int size)
305{
306    char const *text = (char const *)data;
307    unsigned int width = 0, height = 0, x = 0, y = 0, i;
308
309    cucul_set_canvas_size(cv, width, height);
310
311    for(i = 0; i < size; i++)
312    {
313        unsigned char ch = *text++;
314
315        if(ch == '\r')
316            continue;
317
318        if(ch == '\n')
319        {
320            x = 0;
321            y++;
322            continue;
323        }
324
325        if(x >= width || y >= height)
326        {
327            if(x >= width)
328                width = x + 1;
329
330            if(y >= height)
331                height = y + 1;
332
333            cucul_set_canvas_size(cv, width, height);
334        }
335
336        cucul_put_char(cv, x, y, ch);
337        x++;
338    }
339
340    if(y > height)
341        cucul_set_canvas_size(cv, width, height = y);
342
343    return size;
344}
345
346static long int import_ansi(cucul_canvas_t *cv,
347                            void const *data, unsigned int size, int utf8)
348{
349    struct ansi_grcm grcm;
350    unsigned char const *buffer = (unsigned char const*)data;
351    unsigned int i, j, skip, dummy = 0;
352    unsigned int width = 0, height = 0, wch = 1;
353    unsigned long int ch;
354    int x = 0, y = 0, save_x = 0, save_y = 0;
355
356    cucul_set_canvas_size(cv, width, height);
357    if(utf8)
358    {
359        grcm.dfg = cucul_attr_to_ansi_fg(cv->curattr);
360        grcm.dbg = cucul_attr_to_ansi_bg(cv->curattr);
361    }
362    else
363    {
364        grcm.dfg = CUCUL_LIGHTGRAY;
365        grcm.dbg = CUCUL_BLACK;
366        cucul_set_color_ansi(cv, grcm.dfg, grcm.dbg);
367    }
368
369    ansi_parse_grcm(cv, &grcm, 1, &dummy);
370
371    for(i = 0; i < size; i += skip)
372    {
373        skip = 1;
374
375        /* Wrap long lines */
376        if((unsigned int)x >= 80)
377        {
378            x = 0;
379            y++;
380        }
381
382        if(buffer[i] == '\x1a' && size - i >= 8
383           && !memcmp(buffer + i + 1, "SAUCE00", 7))
384            break; /* End before SAUCE data */
385
386        if(buffer[i] == '\r')
387            continue; /* DOS sucks */
388
389        if(buffer[i] == '\n')
390        {
391            x = 0;
392            y++;
393            continue;
394        }
395
396        /* Interpret escape commands, as per Standard ECMA-48 "Control
397         * Functions for Coded Character Sets", 5.4. Control sequences. */
398        if(buffer[i] == '\x1b' && buffer[i + 1] == '[')
399        {
400            unsigned int argc = 0, argv[101];
401            unsigned int param, inter, final;
402
403        /* Compute offsets to parameter bytes, intermediate bytes and
404         * to the final byte. Only the final byte is mandatory, there
405         * can be zero of the others.
406         * 0  param=2             inter                 final           final+1
407         * +-----+------------------+---------------------+-----------------+
408         * | CSI | parameter bytes  | intermediate bytes  |   final byte    |
409         * |     |   0x30 - 0x3f    |    0x20 - 0x2f      |   0x40 - 0x7e   |
410         * | ^[[ | 0123456789:;<=>? | SPC !"#$%&'()*+,-./ | azAZ@[\]^_`{|}~ |
411         * +-----+------------------+---------------------+-----------------+
412         */
413            param = 2;
414
415            for(inter = param; i + inter < size; inter++)
416                if(buffer[i + inter] < 0x30 || buffer[i + inter] > 0x3f)
417                    break;
418
419            for(final = inter; i + final < size; final++)
420                if(buffer[i + final] < 0x20 || buffer[i + final] > 0x2f)
421                    break;
422
423            if(buffer[i + final] < 0x40 || buffer[i + final] > 0x7e)
424                break; /* Invalid Final Byte */
425
426            skip += final;
427
428            /* Sanity checks */
429            if(param < inter && buffer[i + param] >= 0x3c)
430            {
431                fprintf(stderr, "private sequence \"^[[%.*s\"\n",
432                        final - param + 1, buffer + i + param);
433                continue; /* Private sequence, skip it entirely */
434            }
435
436            if(final - param > 100)
437                continue; /* Suspiciously long sequence, skip it */
438
439            /* Parse parameter bytes as per ECMA-48 5.4.2: Parameter string
440             * format */
441            if(param < inter)
442            {
443                argv[0] = 0;
444                for(j = param; j < inter; j++)
445                {
446                    if(buffer[i + j] == ';')
447                        argv[++argc] = 0;
448                    else if(buffer[i + j] >= '0' && buffer[i + j] <= '9')
449                        argv[argc] = 10 * argv[argc] + (buffer[i + j] - '0');
450                }
451                argc++;
452            }
453
454            /* Interpret final byte. The code representations are given in
455             * ECMA-48 5.4: Control sequences, and the code definitions are
456             * given in ECMA-48 8.3: Definition of control functions. */
457            switch(buffer[i + final])
458            {
459            case 'f': /* CUP - Cursor Position */
460            case 'H': /* HVP - Character And Line Position */
461                x = (argc > 1 && argv[1] > 0) ? argv[1] - 1 : 0;
462                y = (argc > 0 && argv[0] > 0) ? argv[0] - 1 : 0;
463                break;
464            case 'A': /* CUU - Cursor Up */
465                y -= argc ? argv[0] : 1;
466                if(y < 0)
467                    y = 0;
468                break;
469            case 'B': /* CUD - Cursor Down */
470                y += argc ? argv[0] : 1;
471                break;
472            case 'C': /* CUF - Cursor Right */
473                x += argc ? argv[0] : 1;
474                break;
475            case 'D': /* CUB - Cursor Left */
476                x -= argc ? argv[0] : 1;
477                if(x < 0)
478                    x = 0;
479                break;
480            case 's': /* Private (save cursor position) */
481                save_x = x;
482                save_y = y;
483                break;
484            case 'u': /* Private (reload cursor position) */
485                x = save_x;
486                y = save_y;
487                break;
488            case 'J': /* ED - Erase In Page */
489                if(argv[0] == 2)
490                    x = y = 0;
491                break;
492            case 'K': /* EL - Erase In Line */
493                if(width < 80)
494                    cucul_set_color_ansi(cv, grcm.dfg, grcm.dbg);
495                    cucul_set_canvas_size(cv, width = 80, height);
496                for(j = x; j < 80; j++)
497                    cucul_put_char(cv, j, y, ' ');
498                x = 80;
499                break;
500            case 'm': /* SGR - Select Graphic Rendition */
501                ansi_parse_grcm(cv, &grcm, argc, argv);
502                break;
503            default:
504                fprintf(stderr, "unknown command %c\n", buffer[i + final]);
505                break;
506            }
507
508            continue;
509        }
510
511        /* Get the character we’re going to paste */
512        if(utf8)
513        {
514            unsigned int bytes;
515
516            if(i + 6 < size)
517                ch = cucul_utf8_to_utf32((char const *)(buffer + i), &bytes);
518            else
519            {
520                /* Add a trailing zero to what we're going to read */
521                char tmp[7];
522                memcpy(tmp, buffer + i, size - i);
523                tmp[size - i] = '\0';
524                ch = cucul_utf8_to_utf32(tmp, &bytes);
525            }
526
527            if(!bytes)
528            {
529                /* If the Unicode is invalid, assume it was latin1. */
530                ch = buffer[i];
531                bytes = 1;
532            }
533            wch = cucul_utf32_is_fullwidth(ch) ? 2 : 1;
534            skip += bytes - 1;
535        }
536        else
537        {
538            ch = cucul_cp437_to_utf32(buffer[i]);
539        }
540
541        /* Make sure the canvas is big enough. */
542        if((unsigned int)x + wch > width)
543        {
544            cucul_set_color_ansi(cv, grcm.dfg, grcm.dbg);
545            cucul_set_canvas_size(cv, width = x + wch, height);
546        }
547
548        if((unsigned int)y >= height)
549        {
550            cucul_set_color_ansi(cv, grcm.dfg, grcm.dbg);
551            cucul_set_canvas_size(cv, width, height = y + 1);
552        }
553
554        /* Now paste our character */
555        cucul_set_color_ansi(cv, grcm.efg, grcm.ebg);
556        cucul_put_char(cv, x, y, ch);
557        x += wch;
558    }
559
560    if((unsigned int)y > height)
561    {
562        cucul_set_color_ansi(cv, grcm.dfg, grcm.dbg);
563        cucul_set_canvas_size(cv, width, height = y);
564    }
565
566    return size;
567}
568
569/* XXX : ANSI loader helper */
570
571static void ansi_parse_grcm(cucul_canvas_t *cv, struct ansi_grcm *g,
572                            unsigned int argc, unsigned int const *argv)
573{
574    static uint8_t const ansi2cucul[] =
575    {
576        CUCUL_BLACK, CUCUL_RED, CUCUL_GREEN, CUCUL_BROWN,
577        CUCUL_BLUE, CUCUL_MAGENTA, CUCUL_CYAN, CUCUL_LIGHTGRAY
578    };
579
580    unsigned int j;
581
582    for(j = 0; j < argc; j++)
583    {
584        /* Defined in ECMA-48 8.3.117: SGR - SELECT GRAPHIC RENDITION */
585        if(argv[j] >= 30 && argv[j] <= 37)
586            g->fg = ansi2cucul[argv[j] - 30];
587        else if(argv[j] >= 40 && argv[j] <= 47)
588            g->bg = ansi2cucul[argv[j] - 40];
589        else if(argv[j] >= 90 && argv[j] <= 97)
590            g->fg = ansi2cucul[argv[j] - 90] + 8;
591        else if(argv[j] >= 100 && argv[j] <= 107)
592            g->bg = ansi2cucul[argv[j] - 100] + 8;
593        else switch(argv[j])
594        {
595        case 0: /* default rendition */
596            g->fg = g->dfg;
597            g->bg = g->dbg;
598            g->bold = g->negative = g->concealed = 0;
599            break;
600        case 1: /* bold or increased intensity */
601            g->bold = 1;
602            break;
603        case 4: /* singly underlined */
604            break;
605        case 5: /* slowly blinking (less then 150 per minute) */
606            break;
607        case 7: /* negative image */
608            g->negative = 1;
609            break;
610        case 8: /* concealed characters */
611            g->concealed = 1;
612            break;
613        case 22: /* normal colour or normal intensity (neither bold nor faint) */
614            g->bold = 0;
615            break;
616        case 28: /* revealed characters */
617            g->concealed = 0;
618            break;
619        case 39: /* default display colour (implementation-defined) */
620            g->fg = g->dfg;
621            break;
622        case 49: /* default background colour (implementation-defined) */
623            g->bg = g->dbg;
624            break;
625        default:
626            fprintf(stderr, "unknown sgr %i\n", argv[j]);
627            break;
628        }
629    }
630
631    if(g->concealed)
632    {
633        g->efg = g->ebg = CUCUL_TRANSPARENT;
634    }
635    else
636    {
637        g->efg = g->negative ? g->bg : g->fg;
638        g->ebg = g->negative ? g->fg : g->bg;
639
640        if(g->bold)
641        {
642            if(g->efg < 8)
643                g->efg += 8;
644            else if(g->efg == CUCUL_DEFAULT)
645                g->efg = CUCUL_WHITE;
646        }
647    }
648}
649
Note: See TracBrowser for help on using the repository browser.