source: zzuf/trunk/src/zzcat.c @ 4245

Last change on this file since 4245 was 4245, checked in by Sam Hocevar, 10 years ago

Move zzcat to the main src/ directory. It's becoming mature enough to be
distributed.

  • Property svn:keywords set to Id
File size: 14.2 KB
Line 
1/*
2 *  zzcat - various cat reimplementations for testing purposes
3 *  Copyright (c) 2006-2010 Sam Hocevar <sam@hocevar.net>
4 *                All Rights Reserved
5 *
6 *  $Id: zzcat.c 4245 2010-01-12 23:53:09Z sam $
7 *
8 *  This program is free software. It comes without any warranty, to
9 *  the extent permitted by applicable law. You can redistribute it
10 *  and/or modify it under the terms of the Do What The Fuck You Want
11 *  To Public License, Version 2, as published by Sam Hocevar. See
12 *  http://sam.zoy.org/wtfpl/COPYING for more details.
13 */
14
15/*
16 * TODO: fsetpos64, fgetln
17 */
18
19#include "config.h"
20
21/* Needed for lseek64() */
22#define _LARGEFILE64_SOURCE
23/* Needed for O_RDONLY on HP-UX */
24#define _INCLUDE_POSIX_SOURCE
25/* Needed for fgets_unlocked() */
26#define _GNU_SOURCE
27
28#if defined HAVE_STDINT_H
29#   include <stdint.h>
30#elif defined HAVE_INTTYPES_H
31#   include <inttypes.h>
32#endif
33#include <sys/types.h>
34#include <sys/stat.h>
35#include <fcntl.h>
36#if defined HAVE_UNISTD_H
37#   include <unistd.h>
38#endif
39#if defined HAVE_SYS_MMAN_H
40#   include <sys/mman.h>
41#endif
42#include <stdlib.h>
43#include <stdio.h>
44#include <string.h>
45
46static inline unsigned int myrand(void)
47{
48    static int seed = 1;
49    int x, y;
50    x = (seed + 0x12345678) << 11;
51    y = (seed + 0xfedcba98) >> 21;
52    seed = x * 1010101 + y * 343434;
53    return seed;
54}
55
56#define MY_FOPEN(cmd) \
57    do { \
58        cmd; \
59        if (!f) \
60        { \
61            fprintf(stderr, "E: zzcat: cannot open `%s'\n", file); \
62            return EXIT_FAILURE; \
63        } \
64        retoff = 0; \
65        p = strchr(p, ')') + 1; \
66    } while(0)
67
68#define MY_FCLOSE(cmd) \
69    do { \
70        cmd; \
71        f = NULL; \
72        p = strchr(p, ')') + 1; \
73    } while(0)
74
75#define MERGE(address, cnt, off) \
76    do { \
77        size_t _cnt = cnt, _off = off; \
78        if (_cnt && retoff + _cnt > retlen) \
79        { \
80            retlen = retoff + _cnt; \
81            retbuf = realloc(retbuf, retlen); \
82        } \
83        if (_cnt > 0) \
84            memcpy(retbuf + retoff, address, _cnt); \
85        retoff += _off; \
86    } while(0)
87
88#define MY_FREAD(cmd, buf, cnt) MY_FCALL(cmd, buf, cnt, cnt)
89#define MY_FSEEK(cmd, off) MY_FCALL(cmd, /* unused */ "", 0, off)
90
91#define MY_FCALL(cmd, buf, cnt, off) \
92    do { \
93        if (!f) \
94        { \
95            f = fopen(file, "r"); \
96            if (!f) \
97            { \
98                fprintf(stderr, "E: zzcat: cannot open `%s'\n", file); \
99                return EXIT_FAILURE; \
100            } \
101        } \
102        cmd; \
103        MERGE(buf, cnt, off); \
104        p = strchr(p, ')') + 1; \
105    } while(0)
106
107#define MY_FEOF() \
108    do { \
109        if (!f) \
110        { \
111            f = fopen(file, "r"); \
112            if (!f) \
113            { \
114                fprintf(stderr, "E: zzcat: cannot open `%s'\n", file); \
115                return EXIT_FAILURE; \
116            } \
117        } \
118        if (feof(f)) \
119            feofs++; \
120        if (feofs >= l1) \
121            finish = 1; \
122        p = strchr(p, ')') + 1; \
123    } while(0)
124
125/*
126 * Command parser. We rewrite fmt by replacing the last character with
127 * '%c' and check that the sscanf() call returns the expected number of
128 * matches plus one (for the last character). We use this macro trick to
129 * avoid using vsscanf() which does not exist on all platforms.
130 */
131
132struct parser
133{
134    char tmpfmt[1024], ch, lastch;
135};
136
137static int make_fmt(struct parser *p, char const *fmt)
138{
139    char const *tmp;
140    size_t len;
141    int ret = 0;
142
143    len = strlen(fmt);
144    p->lastch = fmt[len - 1];
145
146    memcpy(p->tmpfmt, fmt, len - 1);
147    p->tmpfmt[len - 1] = '%';
148    p->tmpfmt[len] = 'c';
149    p->tmpfmt[len + 1] = '\0';
150
151    for (tmp = p->tmpfmt; *tmp; tmp++)
152        if (*tmp == '%')
153            tmp++, ret++;
154
155    return ret;
156}
157
158#define PARSECMD(fmt, arg...) \
159    make_fmt(&parser, fmt) == sscanf(p, parser.tmpfmt, ##arg, &parser.ch) \
160        && parser.ch == parser.lastch
161
162/*
163 * File reader. We parse a command line and perform all the operations it
164 * contains on the specified file.
165 */
166
167static int cat_file(char const *p, char const *file)
168{
169    struct { char const *p; int count; } loops[128];
170    char *retbuf = NULL, *tmp;
171    FILE *f = NULL;
172    size_t retlen = 0, retoff = 0;
173    int nloops = 0, fd = -1, feofs = 0, finish = 0;
174
175    /* Allocate 32MB for our temporary buffer. Any larger value will crash. */
176    tmp = malloc(32 * 1024 * 1024);
177
178    while (*p)
179    {
180        struct parser parser;
181        long int l1, l2;
182        char *s, *lineptr = NULL;
183        size_t k;
184        ssize_t l;
185        int n;
186        char ch;
187
188        (void)k;
189
190        /* Ignore punctuation */
191        if (strchr(" \t,;\r\n", *p))
192            p++;
193
194        /* Loop handling */
195        else if (PARSECMD("repeat ( %li ,", &l1))
196        {
197            p = strchr(p, ',') + 1;
198            loops[nloops].p = p;
199            loops[nloops].count = l1;
200            nloops++;
201        }
202        else if (PARSECMD(")"))
203        {
204            if (nloops == 0)
205            {
206                fprintf(stderr, "E: zzcat: ')' outside a loop\n");
207                return EXIT_FAILURE;
208            }
209            loops[nloops - 1].count--;
210            if (loops[nloops - 1].count <= 0 || finish)
211            {
212                nloops--;
213                p = strchr(p, ')') + 1;
214            }
215            else
216            {
217                p = loops[nloops - 1].p;
218            }
219
220            finish = 0;
221        }
222
223        /* FILE * opening functions */
224        else if (PARSECMD("fopen ( )"))
225            MY_FOPEN(f = fopen(file, "r"));
226#if defined HAVE_FOPEN64
227        else if (PARSECMD("fopen64 ( )"))
228            MY_FOPEN(f = fopen64(file, "r"));
229#endif
230#if defined HAVE___FOPEN64
231        else if (PARSECMD("__fopen64 ( )"))
232            MY_FOPEN(f = __fopen64(file, "r"));
233#endif
234        else if (PARSECMD("freopen ( )"))
235            MY_FOPEN(f = freopen(file, "r", f));
236#if defined HAVE_FREOPEN64
237        else if (PARSECMD("freopen64 ( )"))
238            MY_FOPEN(f = freopen64(file, "r", f));
239#endif
240#if defined HAVE___FREOPEN64
241        else if (PARSECMD("__freopen64 ( )"))
242            MY_FOPEN(f = __freopen64(file, "r", f));
243#endif
244
245        /* FILE * EOF detection */
246        else if (PARSECMD("feof ( %li )", &l1))
247            MY_FEOF();
248
249        /* FILE * closing functions */
250        else if (PARSECMD("fclose ( )"))
251            MY_FCLOSE(fclose(f));
252
253        /* FILE * reading functions */
254        else if (PARSECMD("fread ( %li , %li )", &l1, &l2))
255            MY_FREAD(l = fread(tmp, l1, l2, f), tmp, l > 0 ? l * l1 : 0);
256        else if (PARSECMD("getc ( )"))
257            MY_FREAD(ch = (n = getc(f)), &ch, (n != EOF));
258        else if (PARSECMD("fgetc ( )"))
259            MY_FREAD(ch = (n = fgetc(f)), &ch, (n != EOF));
260        else if (PARSECMD("fgets ( %li )", &l1))
261            MY_FREAD(s = fgets(tmp, l1, f), tmp, s ? strlen(tmp) : 0);
262#if defined HAVE__IO_GETC
263        else if (PARSECMD("_IO_getc ( )"))
264            MY_FREAD(ch = (n = _IO_getc(f)), &ch, (n != EOF));
265#endif
266#if defined HAVE_FREAD_UNLOCKED
267        else if (PARSECMD("fread_unlocked ( %li , %li )", &l1, &l2))
268            MY_FREAD(l = fread_unlocked(tmp, l1, l2, f), tmp, l > 0 ? l * l1 : 0);
269#endif
270#if defined HAVE_FGETS_UNLOCKED
271        else if (PARSECMD("fgets_unlocked ( %li )", &l1))
272            MY_FREAD(s = fgets_unlocked(tmp, l1, f), tmp, s ? strlen(tmp) : 0);
273#endif
274#if defined HAVE_GETC_UNLOCKED
275        else if (PARSECMD("getc_unlocked ( )"))
276            MY_FREAD(ch = (n = getc_unlocked(f)), &ch, (n != EOF));
277#endif
278#if defined HAVE_FGETC_UNLOCKED
279        else if (PARSECMD("fgetc_unlocked ( )"))
280            MY_FREAD(ch = (n = fgetc_unlocked(f)), &ch, (n != EOF));
281#endif
282
283        /* FILE * getdelim functions */
284#if defined HAVE_GETLINE
285        else if (PARSECMD("getline ( )"))
286            MY_FREAD(l = getline(&lineptr, &k, f), lineptr, l >= 0 ? l : 0);
287#endif
288#if defined HAVE_GETDELIM
289        else if (PARSECMD("getdelim ( '%c' )", &ch))
290            MY_FREAD(l = getdelim(&lineptr, &k, ch, f), lineptr, l >= 0 ? l : 0);
291        else if (PARSECMD("getdelim ( %i )", &n))
292            MY_FREAD(l = getdelim(&lineptr, &k, n, f), lineptr, l >= 0 ? l : 0);
293#endif
294#if defined HAVE___GETDELIM
295        else if (PARSECMD("__getdelim ( '%c' )", &ch))
296            MY_FREAD(l = __getdelim(&lineptr, &k, ch, f), lineptr, l >= 0 ? l : 0);
297        else if (PARSECMD("__getdelim ( %i )", &n))
298            MY_FREAD(l = __getdelim(&lineptr, &k, n, f), lineptr, l >= 0 ? l : 0);
299#endif
300
301        /* FILE * seeking functions */
302        else if (PARSECMD("fseek ( %li , SEEK_CUR )", &l1))
303            MY_FSEEK(l = fseek(f, l1, SEEK_CUR),
304                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
305        else if (PARSECMD("fseek ( %li , SEEK_SET )", &l1))
306            MY_FSEEK(l = fseek(f, l1, SEEK_SET),
307                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
308        else if (PARSECMD("fseek ( %li , SEEK_END )", &l1))
309            MY_FSEEK(l = fseek(f, l1, SEEK_END),
310                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
311#if defined HAVE_FSEEKO
312        else if (PARSECMD("fseeko ( %li , SEEK_CUR )", &l1))
313            MY_FSEEK(l = fseeko(f, l1, SEEK_CUR),
314                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
315        else if (PARSECMD("fseeko ( %li , SEEK_SET )", &l1))
316            MY_FSEEK(l = fseeko(f, l1, SEEK_SET),
317                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
318        else if (PARSECMD("fseeko ( %li , SEEK_END )", &l1))
319            MY_FSEEK(l = fseeko(f, l1, SEEK_END),
320                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
321#endif
322#if defined HAVE_FSEEKO64
323        else if (PARSECMD("fseeko64 ( %li , SEEK_CUR )", &l1))
324            MY_FSEEK(l = fseeko64(f, l1, SEEK_CUR),
325                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
326        else if (PARSECMD("fseeko64 ( %li , SEEK_SET )", &l1))
327            MY_FSEEK(l = fseeko64(f, l1, SEEK_SET),
328                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
329        else if (PARSECMD("fseeko64 ( %li , SEEK_END )", &l1))
330            MY_FSEEK(l = fseeko64(f, l1, SEEK_END),
331                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
332#endif
333#if defined HAVE___FSEEKO64
334        else if (PARSECMD("__fseeko64 ( %li , SEEK_CUR )", &l1))
335            MY_FSEEK(l = __fseeko64(f, l1, SEEK_CUR),
336                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
337        else if (PARSECMD("__fseeko64 ( %li , SEEK_SET )", &l1))
338            MY_FSEEK(l = __fseeko64(f, l1, SEEK_SET),
339                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
340        else if (PARSECMD("__fseeko64 ( %li , SEEK_END )", &l1))
341            MY_FSEEK(l = __fseeko64(f, l1, SEEK_END),
342                     ftell(f) >= 0 ? ftell(f) - retoff : 0);
343#endif
344        else if (PARSECMD("rewind ( )"))
345            MY_FSEEK(rewind(f), -retlen);
346        else if (PARSECMD("ungetc ( )"))
347            MY_FSEEK(if(retoff) ungetc((unsigned char)retbuf[retoff - 1], f),
348                     retoff ? -1 : 0);
349
350        /* Unrecognised sequence */
351        else
352        {
353            char buf[16];
354            snprintf(buf, 16, strlen(p) < 16 ? "%s" : "%.12s...", p);
355            fprintf(stderr, "E: zzcat: syntax error near `%s'\n", buf);
356            return EXIT_FAILURE;
357        }
358
359        /* Clean up our mess */
360        if (lineptr)
361            free(lineptr);
362
363        if (finish && !nloops)
364            break;
365    }
366
367    if (f)
368        fclose(f);
369
370    if (fd >= 0)
371        close(fd);
372
373    fwrite(retbuf, retlen, 1, stdout);
374
375    free(retbuf);
376    free(tmp);
377
378    return EXIT_SUCCESS;
379}
380
381/*
382 * Main program.
383 */
384
385int main(int argc, char *argv[])
386{
387    int i;
388
389    if (argc < 2)
390    {
391        fprintf(stderr, "E: zzcat: too few arguments\n");
392        return EXIT_FAILURE;
393    }
394
395    if (argc == 2)
396        return cat_file("fread(1,33554432)", argv[1]);
397
398    for (i = 2; i < argc; i++)
399    {
400        int ret = cat_file(argv[1], argv[i]);
401        if (ret)
402            return ret;
403    }
404
405    return EXIT_SUCCESS;
406}
407
408#if 0
409/* Only read() calls */
410static int zzcat_read(char const *name, unsigned char *data, int64_t len,
411                      int64_t chunk)
412{
413    int i, fd = open(name, O_RDONLY);
414    if(fd < 0)
415        return EXIT_FAILURE;
416    for(i = 0; i < len; i += chunk)
417        read(fd, data + i, chunk);
418    close(fd);
419    return EXIT_SUCCESS;
420}
421
422/* Socket seeks and reads */
423static int zzcat_random_socket(char const *name, unsigned char *data,
424                               int64_t len)
425{
426    int i, j, fd = open(name, O_RDONLY);
427    if(fd < 0)
428        return EXIT_FAILURE;
429    for(i = 0; i < 128; i++)
430    {
431        lseek(fd, myrand() % len, SEEK_SET);
432        for(j = 0; j < 4; j++)
433            read(fd, data + lseek(fd, 0, SEEK_CUR), myrand() % 4096);
434#ifdef HAVE_LSEEK64
435        lseek64(fd, myrand() % len, SEEK_SET);
436        for(j = 0; j < 4; j++)
437            read(fd, data + lseek(fd, 0, SEEK_CUR), myrand() % 4096);
438#endif
439    }
440    close(fd);
441    return EXIT_SUCCESS;
442}
443
444/* Standard stream seeks and reads */
445static int zzcat_random_stream(char const *name, unsigned char *data,
446                               int64_t len)
447{
448    FILE *stream = fopen(name, "r");
449    int i, j;
450    if(!stream)
451        return EXIT_FAILURE;
452    for(i = 0; i < 128; i++)
453    {
454        long int now;
455        fseek(stream, myrand() % len, SEEK_SET);
456        for(j = 0; j < 4; j++)
457            fread(data + ftell(stream),
458                  myrand() % (len - ftell(stream)), 1, stream);
459        fseek(stream, myrand() % len, SEEK_SET);
460        now = ftell(stream);
461        for(j = 0; j < 16; j++)
462            data[now + j] = getc(stream);
463        now = ftell(stream);
464        for(j = 0; j < 16; j++)
465            data[now + j] = fgetc(stream);
466    }
467    fclose(stream);
468    return EXIT_SUCCESS;
469}
470
471#ifdef HAVE_MMAP
472/* mmap() followed by random memory reads */
473static int zzcat_random_mmap(char const *name, unsigned char *data,
474                               int64_t len)
475{
476    int i, j, fd = open(name, O_RDONLY);
477    if(fd < 0)
478        return EXIT_FAILURE;
479    for(i = 0; i < 128; i++)
480    {
481        char *map;
482        int moff, mlen, pgsz = len + 1;
483#ifdef HAVE_GETPAGESIZE
484        pgsz = getpagesize();
485#endif
486        moff = len < pgsz ? 0 : (myrand() % (len / pgsz)) * pgsz;
487        mlen = 1 + (myrand() % (len - moff));
488        map = mmap(NULL, mlen, PROT_READ, MAP_PRIVATE, fd, moff);
489        if(map == MAP_FAILED)
490            return EXIT_FAILURE;
491        for(j = 0; j < 128; j++)
492        {
493            int x = myrand() % mlen;
494            data[moff + x] = map[x];
495        }
496        munmap(map, mlen);
497    }
498    close(fd);
499    return EXIT_SUCCESS;
500}
501#endif
502#endif
503
Note: See TracBrowser for help on using the repository browser.