source: libcaca/trunk/cucul/import.c @ 1349

Last change on this file since 1349 was 1349, checked in by Sam Hocevar, 16 years ago
  • Fixed a bug in the caca importer.
  • Property svn:keywords set to Id
File size: 19.1 KB
Line 
1/*
2 *  libcucul      Canvas for ultrafast compositing of Unicode letters
3 *  Copyright (c) 2002-2006 Sam Hocevar <sam@zoy.org>
4 *                All Rights Reserved
5 *
6 *  $Id: import.c 1349 2006-11-11 17:55:11Z sam $
7 *
8 *  This library is free software; you can redistribute it and/or
9 *  modify it under the terms of the Do What The Fuck You Want To
10 *  Public License, Version 2, as published by Sam Hocevar. See
11 *  http://sam.zoy.org/wtfpl/COPYING for more details.
12 */
13
14/*
15 *  This file contains various import functions.
16 */
17
18#include "config.h"
19#include "common.h"
20
21#if !defined __KERNEL__
22#   if defined HAVE_ERRNO_H
23#       include <errno.h>
24#   endif
25#   include <stdio.h>
26#   include <stdlib.h>
27#   include <string.h>
28#endif
29
30#include "cucul.h"
31#include "cucul_internals.h"
32
33static inline uint32_t sscanu32(void const *s)
34{
35    uint32_t x;
36    memcpy(&x, s, 4);
37    return hton32(x);
38}
39
40static inline uint16_t sscanu16(void const *s)
41{
42    uint16_t x;
43    memcpy(&x, s, 2);
44    return hton16(x);
45}
46
47/* ANSI Graphic Rendition Combination Mode */
48struct ansi_grcm
49{
50    uint8_t fg, bg;   /* ANSI-context fg/bg */
51    uint8_t efg, ebg; /* Effective (libcucul) fg/bg */
52    uint8_t bold, negative, concealed;
53};
54
55static long int import_caca(cucul_canvas_t *, void const *, unsigned int);
56static long int import_text(cucul_canvas_t *, void const *, unsigned int);
57static long int import_ansi(cucul_canvas_t *, void const *, unsigned int, int);
58
59static void ansi_parse_grcm(cucul_canvas_t *, struct ansi_grcm *,
60                            unsigned int, unsigned int const *);
61
62/** \brief Import a memory buffer into a canvas
63 *
64 *  Import a memory buffer into the given libcucul canvas's current
65 *  frame. The current frame is resized accordingly and its contents are
66 *  replaced with the imported data.
67 *
68 *  Valid values for \c format are:
69 *  - \c "": attempt to autodetect the file format.
70 *  - \c "caca": import native libcaca files.
71 *  - \c "text": import ASCII text files.
72 *  - \c "ansi": import ANSI files.
73 *  - \c "utf8": import UTF-8 files with ANSI colour codes.
74 *
75 *  The number of bytes read is returned. If the file format is valid, but
76 *  not enough data was available, 0 is returned.
77 *
78 *  If an error occurs, -1 is returned and \b errno is set accordingly:
79 *  - \c ENOMEM Not enough memory to allocate canvas.
80 *  - \c EINVAL Invalid format requested.
81 *
82 *  \param cv A libcucul canvas in which to import the file.
83 *  \param data A memory area containing the data to be loaded into the canvas.
84 *  \param len The size in bytes of the memory area.
85 *  \param format A string describing the input format.
86 *  \return The number of bytes read, or 0 if there was not enough data,
87 *  or -1 if an error occurred.
88 */
89long int cucul_import_memory(cucul_canvas_t *cv, void const *data,
90                             unsigned long int len, char const *format)
91{
92    if(!strcasecmp("caca", format))
93        return import_caca(cv, data, len);
94    if(!strcasecmp("utf8", format))
95        return import_ansi(cv, data, len, 1);
96    if(!strcasecmp("text", format))
97        return import_text(cv, data, len);
98    if(!strcasecmp("ansi", format))
99        return import_ansi(cv, data, len, 0);
100
101    /* Autodetection */
102    if(!strcasecmp("", format))
103    {
104        unsigned char const *str = data;
105        unsigned int i;
106
107        /* If 4 first bytes are 0xcaca + 'CV' */
108        if(len >= 4 && str[0] == 0xca &&
109           str[1] == 0xca && str[2] == 'C' && str[3] == 'V')
110            return import_caca(cv, data, len);
111
112        /* If we find ESC[ argv, we guess it's an ANSI file */
113        for(i = 0; i + 1 < len; i++)
114            if((str[i] == 0x1b) && (str[i + 1] == '['))
115                return import_ansi(cv, data, len, 0);
116
117        /* Otherwise, import it as text */
118        return import_text(cv, data, len);
119    }
120
121#if defined HAVE_ERRNO_H
122    errno = EINVAL;
123#endif
124    return -1;
125}
126
127/** \brief Import a file into a canvas
128 *
129 *  Import a file into the given libcucul canvas's current frame. The
130 *  current frame is resized accordingly and its contents are replaced
131 *  with the imported data.
132 *
133 *  Valid values for \c format are:
134 *  - \c "": attempt to autodetect the file format.
135 *  - \c "caca": import native libcaca files.
136 *  - \c "text": import ASCII text files.
137 *  - \c "ansi": import ANSI files.
138 *  - \c "utf8": import UTF-8 files with ANSI colour codes.
139 *
140 *  The number of bytes read is returned. If the file format is valid, but
141 *  not enough data was available, 0 is returned.
142 *
143 *  If an error occurs, -1 is returned and \b errno is set accordingly:
144 *  - \c ENOSYS File access is not implemented on this system.
145 *  - \c ENOMEM Not enough memory to allocate canvas.
146 *  - \c EINVAL Invalid format requested.
147 *  cucul_import_file() may also fail and set \b errno for any of the
148 *  errors specified for the routine fopen().
149 *
150 *  \param cv A libcucul canvas in which to import the file.
151 *  \param filename The name of the file to load.
152 *  \param format A string describing the input format.
153 *  \return The number of bytes read, or 0 if there was not enough data,
154 *  or -1 if an error occurred.
155 */
156long int cucul_import_file(cucul_canvas_t *cv, char const *filename,
157                           char const *format)
158{
159#if defined __KERNEL__
160#   if defined HAVE_ERRNO_H
161    errno = ENOSYS;
162#   endif
163    return -1;
164#else
165    FILE *fp;
166    void *data;
167    long int size;
168    int ret;
169
170    fp = fopen(filename, "rb");
171    if(!fp)
172        return -1; /* fopen already set errno */
173
174    fseek(fp, 0, SEEK_END);
175    size = ftell(fp);
176
177    data = malloc(size);
178    if(!data)
179    {
180        fclose(fp);
181#   if defined HAVE_ERRNO_H
182        errno = ENOMEM;
183#   endif
184        return -1;
185    }
186
187    fseek(fp, 0, SEEK_SET);
188    fread(data, size, 1, fp);
189    fclose(fp);
190
191    ret = cucul_import_memory(cv, data, size, format);
192    free(data);
193
194    return ret;
195#endif
196}
197   
198/** \brief Get available import formats
199 *
200 *  Return a list of available import formats. The list is a NULL-terminated
201 *  array of strings, interleaving a string containing the internal value for
202 *  the import format, to be used with cucul_import_canvas(), and a string
203 *  containing the natural language description for that import format.
204 *
205 *  This function never fails.
206 *
207 *  \return An array of strings.
208 */
209char const * const * cucul_get_import_list(void)
210{
211    static char const * const list[] =
212    {
213        "", "autodetect",
214        "caca", "native libcaca format",
215        "text", "plain text",
216        "ansi", "ANSI coloured text",
217        "utf8", "UTF-8 files with ANSI colour codes",
218        NULL, NULL
219    };
220
221    return list;
222}
223
224/*
225 * XXX: the following functions are local.
226 */
227
228static long int import_caca(cucul_canvas_t *cv,
229                            void const *data, unsigned int size)
230{
231    uint8_t const *buf = (uint8_t const *)data;
232    unsigned int control_size, data_size, expected_size, frames, f, n;
233    uint16_t version, flags;
234
235    if(size < 20)
236        return 0;
237
238    if(buf[0] != 0xca || buf[1] != 0xca || buf[2] != 'C' || buf[3] != 'V')
239        goto invalid_caca;
240
241    control_size = sscanu32(buf + 4);
242    data_size = sscanu32(buf + 8);
243    version = sscanu16(buf + 12);
244    frames = sscanu32(buf + 14);
245    flags = sscanu16(buf + 18);
246
247    if(size < 4 + control_size + data_size)
248        return 0;
249
250    if(control_size < 16 + frames * 32)
251        goto invalid_caca;
252
253    for(expected_size = 0, f = 0; f < frames; f++)
254    {
255        unsigned int width, height, duration;
256        uint32_t attr;
257        int x, y, handlex, handley;
258
259        width = sscanu32(buf + 4 + 16 + f * 24);
260        height = sscanu32(buf + 4 + 16 + f * 24 + 4);
261        duration = sscanu32(buf + 4 + 16 + f * 24 + 8);
262        attr = sscanu32(buf + 4 + 16 + f * 24 + 12);
263        x = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 16);
264        y = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 20);
265        handlex = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 24);
266        handley = (int32_t)sscanu32(buf + 4 + 16 + f * 24 + 28);
267
268        expected_size += width * height * 8;
269    }
270
271    if(expected_size != data_size)
272        goto invalid_caca;
273
274    /* FIXME: read all frames, not only the first one */
275    cucul_set_canvas_size(cv, 0, 0);
276    cucul_set_canvas_size(cv, sscanu32(buf + 4 + 16),
277                              sscanu32(buf + 4 + 16 + 4));
278
279    /* FIXME: check for return value */
280
281    for(n = sscanu32(buf + 4 + 16) * sscanu32(buf + 4 + 16 + 4); n--; )
282    {
283        cv->chars[n] = sscanu32(buf + 4 + control_size + 8 * n);
284        cv->attrs[n] = sscanu32(buf + 4 + control_size + 8 * n + 4);
285    }
286
287    cv->curattr = sscanu32(buf + 4 + 16 + 12);
288    cv->frames[0].x = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 16);
289    cv->frames[0].y = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 20);
290    cv->frames[0].handlex = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 24);
291    cv->frames[0].handley = (int32_t)sscanu32(buf + 4 + 16 + 0 * 24 + 28);
292
293    return 4 + control_size + data_size;
294
295invalid_caca:
296#if defined HAVE_ERRNO_H
297    errno = EINVAL;
298#endif
299    return -1;
300}
301
302static long int import_text(cucul_canvas_t *cv,
303                            void const *data, unsigned int size)
304{
305    char const *text = (char const *)data;
306    unsigned int width = 0, height = 0, x = 0, y = 0, i;
307
308    cucul_set_canvas_size(cv, width, height);
309
310    for(i = 0; i < size; i++)
311    {
312        unsigned char ch = *text++;
313
314        if(ch == '\r')
315            continue;
316
317        if(ch == '\n')
318        {
319            x = 0;
320            y++;
321            continue;
322        }
323
324        if(x >= width || y >= height)
325        {
326            if(x >= width)
327                width = x + 1;
328
329            if(y >= height)
330                height = y + 1;
331
332            cucul_set_canvas_size(cv, width, height);
333        }
334
335        cucul_put_char(cv, x, y, ch);
336        x++;
337    }
338
339    if(y > height)
340        cucul_set_canvas_size(cv, width, height = y);
341
342    return size;
343}
344
345static long int import_ansi(cucul_canvas_t *cv,
346                            void const *data, unsigned int size, int utf8)
347{
348    struct ansi_grcm grcm;
349    unsigned char const *buffer = (unsigned char const*)data;
350    unsigned int i, j, skip, dummy = 0;
351    unsigned int width = 0, height = 0, wch = 1;
352    unsigned long int ch;
353    int x = 0, y = 0, save_x = 0, save_y = 0;
354
355    cucul_set_canvas_size(cv, width, height);
356    ansi_parse_grcm(cv, &grcm, 1, &dummy);
357
358    for(i = 0; i < size; i += skip)
359    {
360        skip = 1;
361
362        /* Wrap long lines */
363        if((unsigned int)x >= 80)
364        {
365            x = 0;
366            y++;
367        }
368
369        if(buffer[i] == '\x1a' && size - i >= 8
370           && !memcmp(buffer + i + 1, "SAUCE00", 7))
371            break; /* End before SAUCE data */
372
373        if(buffer[i] == '\r')
374            continue; /* DOS sucks */
375
376        if(buffer[i] == '\n')
377        {
378            x = 0;
379            y++;
380            continue;
381        }
382
383        /* Interpret escape commands, as per Standard ECMA-48 "Control
384         * Functions for Coded Character Sets", 5.4. Control sequences. */
385        if(buffer[i] == '\x1b' && buffer[i + 1] == '[')
386        {
387            unsigned int argc = 0, argv[101];
388            unsigned int param, inter, final;
389
390        /* Compute offsets to parameter bytes, intermediate bytes and
391         * to the final byte. Only the final byte is mandatory, there
392         * can be zero of the others.
393         * 0  param=2             inter                 final           final+1
394         * +-----+------------------+---------------------+-----------------+
395         * | CSI | parameter bytes  | intermediate bytes  |   final byte    |
396         * |     |   0x30 - 0x3f    |    0x20 - 0x2f      |   0x40 - 0x7e   |
397         * | ^[[ | 0123456789:;<=>? | SPC !"#$%&'()*+,-./ | azAZ@[\]^_`{|}~ |
398         * +-----+------------------+---------------------+-----------------+
399         */
400            param = 2;
401
402            for(inter = param; i + inter < size; inter++)
403                if(buffer[i + inter] < 0x30 || buffer[i + inter] > 0x3f)
404                    break;
405
406            for(final = inter; i + final < size; final++)
407                if(buffer[i + final] < 0x20 || buffer[i + final] > 0x2f)
408                    break;
409
410            if(buffer[i + final] < 0x40 || buffer[i + final] > 0x7e)
411                break; /* Invalid Final Byte */
412
413            skip += final;
414
415            /* Sanity checks */
416            if(param < inter && buffer[i + param] >= 0x3c)
417            {
418                fprintf(stderr, "private sequence \"^[[%.*s\"\n",
419                        final - param + 1, buffer + i + param);
420                continue; /* Private sequence, skip it entirely */
421            }
422
423            if(final - param > 100)
424                continue; /* Suspiciously long sequence, skip it */
425
426            /* Parse parameter bytes as per ECMA-48 5.4.2: Parameter string
427             * format */
428            if(param < inter)
429            {
430                argv[0] = 0;
431                for(j = param; j < inter; j++)
432                {
433                    if(buffer[i + j] == ';')
434                        argv[++argc] = 0;
435                    else if(buffer[i + j] >= '0' && buffer[i + j] <= '9')
436                        argv[argc] = 10 * argv[argc] + (buffer[i + j] - '0');
437                }
438                argc++;
439            }
440
441            /* Interpret final byte. The code representations are given in
442             * ECMA-48 5.4: Control sequences, and the code definitions are
443             * given in ECMA-48 8.3: Definition of control functions. */
444            switch(buffer[i + final])
445            {
446            case 'f': /* CUP - Cursor Position */
447            case 'H': /* HVP - Character And Line Position */
448                x = (argc > 1 && argv[1] > 0) ? argv[1] - 1 : 0;
449                y = (argc > 0 && argv[0] > 0) ? argv[0] - 1 : 0;
450                break;
451            case 'A': /* CUU - Cursor Up */
452                y -= argc ? argv[0] : 1;
453                if(y < 0)
454                    y = 0;
455                break;
456            case 'B': /* CUD - Cursor Down */
457                y += argc ? argv[0] : 1;
458                break;
459            case 'C': /* CUF - Cursor Right */
460                x += argc ? argv[0] : 1;
461                break;
462            case 'D': /* CUB - Cursor Left */
463                x -= argc ? argv[0] : 1;
464                if(x < 0)
465                    x = 0;
466                break;
467            case 's': /* Private (save cursor position) */
468                save_x = x;
469                save_y = y;
470                break;
471            case 'u': /* Private (reload cursor position) */
472                x = save_x;
473                y = save_y;
474                break;
475            case 'J': /* ED - Erase In Page */
476                if(argv[0] == 2)
477                    x = y = 0;
478                break;
479            case 'K': /* EL - Erase In Line */
480                if(width < 80)
481                    cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT);
482                    cucul_set_canvas_size(cv, width = 80, height);
483                for(j = x; j < 80; j++)
484                    cucul_put_char(cv, j, y, ' ');
485                x = 80;
486                break;
487            case 'm': /* SGR - Select Graphic Rendition */
488                ansi_parse_grcm(cv, &grcm, argc, argv);
489                break;
490            default:
491                fprintf(stderr, "unknown command %c\n", buffer[i + final]);
492                break;
493            }
494
495            continue;
496        }
497
498        /* Get the character we’re going to paste */
499        if(utf8)
500        {
501            unsigned int bytes;
502
503            if(i + 6 < size)
504                ch = cucul_utf8_to_utf32((char const *)(buffer + i), &bytes);
505            else
506            {
507                /* Add a trailing zero to what we're going to read */
508                char tmp[7];
509                memcpy(tmp, buffer + i, size - i);
510                tmp[size - i] = '\0';
511                ch = cucul_utf8_to_utf32(tmp, &bytes);
512            }
513
514            if(!bytes)
515            {
516                /* If the Unicode is invalid, assume it was latin1. */
517                ch = buffer[i];
518                bytes = 1;
519            }
520            wch = cucul_utf32_is_fullwidth(ch) ? 2 : 1;
521            skip += bytes - 1;
522        }
523        else
524        {
525            ch = cucul_cp437_to_utf32(buffer[i]);
526        }
527
528        /* Make sure the canvas is big enough. */
529        if((unsigned int)x + wch > width)
530        {
531            cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT);
532            cucul_set_canvas_size(cv, width = x + wch, height);
533        }
534
535        if((unsigned int)y >= height)
536        {
537            cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT);
538            cucul_set_canvas_size(cv, width, height = y + 1);
539        }
540
541        /* Now paste our character */
542        cucul_set_color_ansi(cv, grcm.efg, grcm.ebg);
543        cucul_put_char(cv, x, y, ch);
544        x += wch;
545    }
546
547    if((unsigned int)y > height)
548    {
549        cucul_set_color_ansi(cv, CUCUL_DEFAULT, CUCUL_TRANSPARENT);
550        cucul_set_canvas_size(cv, width, height = y);
551    }
552
553    return size;
554}
555
556/* XXX : ANSI loader helper */
557
558static void ansi_parse_grcm(cucul_canvas_t *cv, struct ansi_grcm *g,
559                            unsigned int argc, unsigned int const *argv)
560{
561    static uint8_t const ansi2cucul[] =
562    {
563        CUCUL_BLACK, CUCUL_RED, CUCUL_GREEN, CUCUL_BROWN,
564        CUCUL_BLUE, CUCUL_MAGENTA, CUCUL_CYAN, CUCUL_LIGHTGRAY
565    };
566
567    unsigned int j;
568
569    for(j = 0; j < argc; j++)
570    {
571        /* Defined in ECMA-48 8.3.117: SGR - SELECT GRAPHIC RENDITION */
572        if(argv[j] >= 30 && argv[j] <= 37)
573            g->fg = ansi2cucul[argv[j] - 30];
574        else if(argv[j] >= 40 && argv[j] <= 47)
575            g->bg = ansi2cucul[argv[j] - 40];
576        else if(argv[j] >= 90 && argv[j] <= 97)
577            g->fg = ansi2cucul[argv[j] - 90] + 8;
578        else if(argv[j] >= 100 && argv[j] <= 107)
579            g->bg = ansi2cucul[argv[j] - 100] + 8;
580        else switch(argv[j])
581        {
582        case 0: /* default rendition */
583            g->fg = CUCUL_DEFAULT;
584            g->bg = CUCUL_TRANSPARENT;
585            g->bold = g->negative = g->concealed = 0;
586            break;
587        case 1: /* bold or increased intensity */
588            g->bold = 1;
589            break;
590        case 4: /* singly underlined */
591            break;
592        case 5: /* slowly blinking (less then 150 per minute) */
593            break;
594        case 7: /* negative image */
595            g->negative = 1;
596            break;
597        case 8: /* concealed characters */
598            g->concealed = 1;
599            break;
600        case 22: /* normal colour or normal intensity (neither bold nor faint) */
601            g->bold = 0;
602            break;
603        case 28: /* revealed characters */
604            g->concealed = 0;
605            break;
606        case 39: /* default display colour (implementation-defined) */
607            g->fg = CUCUL_DEFAULT;
608            break;
609        case 49: /* default background colour (implementation-defined) */
610            g->bg = CUCUL_TRANSPARENT;
611            break;
612        default:
613            fprintf(stderr, "unknown sgr %i\n", argv[j]);
614            break;
615        }
616    }
617
618    if(g->concealed)
619    {
620        g->efg = g->ebg = CUCUL_TRANSPARENT;
621    }
622    else
623    {
624        g->efg = g->negative ? g->bg : g->fg;
625        g->ebg = g->negative ? g->fg : g->bg;
626
627        if(g->bold)
628        {
629            if(g->efg < 8)
630                g->efg += 8;
631            else if(g->efg == CUCUL_DEFAULT)
632                g->efg = CUCUL_WHITE;
633        }
634    }
635}
636
Note: See TracBrowser for help on using the repository browser.