Changeset 3526


Ignore:
Timestamp:
May 27, 2009 7:04:10 AM (5 years ago)
Author:
sam
Message:

Add multiple charsets support to img2twit, and autodetect charset when
decoding.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • libpipi/trunk/examples/img2twit.cpp

    r3525 r3526  
    2727 
    2828/* 
    29  * User-definable settings. 
     29 * Format-dependent settings. Change this and you risk making all other 
     30 * generated strings unusable. 
    3031 */ 
    3132 
    32 /* The Unicode characters at disposal - XXX: must be _ordered_ */ 
    33 static const uint32_t unichars[] = 
    34 { 
    35     /* Printable ASCII (except space) */ 
    36     //0x0021, 0x007f, 
    37  
    38     /* Stupid symbols and Dingbats shit */ 
    39     //0x25a0, 0x2600, /* Geometric Shapes */ 
    40     //0x2600, 0x269e, 0x26a0, 0x26bd, 0x26c0, 0x26c4, /* Misc. Symbols */ 
    41     //0x2701, 0x2705, 0x2706, 0x270a, 0x270c, 0x2728, 0x2729, 0x274c, 
    42     //  0x274d, 0x274e, 0x274f, 0x2753, 0x2756, 0x2757, 0x2758, 0x275f, 
    43     //  0x2761, 0x2795, 0x2798, 0x27b0, 0x27b1, 0x27bf, /* Dingbats */ 
    44  
    45     /* Chinese-looking stuff */ 
    46     //0x2e80, 0x2e9a, 0x2e9b, 0x2ef4, /* CJK Radicals Supplement */ 
    47     //0x2f00, 0x2fd6, /* Kangxi Radicals */ 
    48     //0x3400, 0x4db6, /* CJK Unified Ideographs Extension A */ 
    49     0x4e00, 0x9fa6, /* CJK Unified Ideographs */ 
    50  
    51     /* Korean - most people don't know the difference anyway */ 
    52     //0xac00, 0xd7a4, /* Hangul Syllables */ 
    53  
    54     /* More Chinese */ 
    55     //0xf900, 0xfa2e, 0xfa30, 0xfa6b, 0xfa70, 0xfada, /* CJK Compat. Idgphs. */ 
    56  
    57     /* TODO: there's also the U+20000 and U+2f800 planes, but they're 
    58      * not supported by the Twitter Javascript filter (yet?). */ 
    59  
    60     /* End of list marker - XXX: don't remove! */ 
    61     0x0000, 0x0000 
    62 }; 
     33/* Printable ASCII (except space) */ 
     34#define RANGE_ASCII 0x0021, 0x007f 
     35 
     36/* CJK Unified Ideographs */ 
     37#define RANGE_CJK 0x4e00, 0x9fa6 
     38//0x2e80, 0x2e9a, 0x2e9b, 0x2ef4, /* CJK Radicals Supplement */ 
     39//0x2f00, 0x2fd6, /* Kangxi Radicals */ 
     40//0x3400, 0x4db6, /* CJK Unified Ideographs Extension A */ 
     41//0xac00, 0xd7a4, /* Hangul Syllables -- Korean, not Chinese */ 
     42//0xf900, 0xfa2e, 0xfa30, 0xfa6b, 0xfa70, 0xfada, /* CJK Compat. Idgphs. */ 
     43/* TODO: there's also the U+20000 and U+2f800 planes, but they're 
     44 * not supported by the Twitter Javascript filter (yet?). */ 
     45 
     46/* Stupid symbols and Dingbats shit */ 
     47#define RANGE_SYMBOLS 0x25a0, 0x2600, /* Geometric Shapes */ \ 
     48  0x2600, 0x269e, 0x26a0, 0x26bd, 0x26c0, 0x26c4, /* Misc. Symbols */ \ 
     49  0x2701, 0x2705, 0x2706, 0x270a, 0x270c, 0x2728, 0x2729, 0x274c, \ 
     50    0x274d, 0x274e, 0x274f, 0x2753, 0x2756, 0x2757, 0x2758, 0x275f, \ 
     51    0x2761, 0x2795, 0x2798, 0x27b0, 0x27b1, 0x27bf /* Dingbats */ 
     52 
     53/* End of list marker */ 
     54#define RANGE_END 0x0, 0x0 
     55 
     56/* Pre-defined character ranges XXX: must be _ordered_ */ 
     57static const uint32_t unichars_ascii[] = { RANGE_ASCII, RANGE_END }; 
     58static const uint32_t unichars_cjk[] = { RANGE_CJK, RANGE_END }; 
     59static const uint32_t unichars_symbols[] = { RANGE_SYMBOLS, RANGE_END }; 
     60 
     61/* The Unicode characters at disposal */ 
     62static const uint32_t *unichars; 
    6363 
    6464/* The maximum image size we want to support */ 
     
    795795int main(int argc, char *argv[]) 
    796796{ 
     797    uint32_t unicode_data[4096]; /* FIXME: allocate this dynamically */ 
    797798    int opstats[2 * NB_OPS]; 
    798799    char const *srcname = NULL, *dstname = NULL; 
    799800    pipi_image_t *src, *tmp, *dst; 
    800801    double error = 1.0; 
    801     int width, height, ret = 0; 
     802    int width, height; 
    802803 
    803804    /* Parse command-line options */ 
     
    809810            { "output",      1, NULL, 'o' }, 
    810811            { "length",      1, NULL, 'l' }, 
     812            { "charset",     1, NULL, 'c' }, 
    811813            { "quality",     1, NULL, 'q' }, 
    812814            { "debug",       0, NULL, 'd' }, 
     
    814816            { NULL,          0, NULL, 0   }, 
    815817        }; 
    816         int c = mygetopt(argc, argv, "o:l:q:dh", long_options, &option_index); 
     818        int c = mygetopt(argc, argv, "o:l:c:q:dh", long_options, &option_index); 
    817819 
    818820        if(c == -1) 
     
    830832                fprintf(stderr, "Warning: rounding minimum message length to 16\n"); 
    831833                MAX_MSG_LEN = 16; 
     834            } 
     835            break; 
     836        case 'c': 
     837            if(!strcmp(myoptarg, "ascii")) 
     838                unichars = unichars_ascii; 
     839            else if(!strcmp(myoptarg, "cjk")) 
     840                unichars = unichars_cjk; 
     841            else if(!strcmp(myoptarg, "symbols")) 
     842                unichars = unichars_symbols; 
     843            else 
     844            { 
     845                fprintf(stderr, "Error: invalid char block \"%s\".", myoptarg); 
     846                fprintf(stderr, "Valid sets are: ascii, cjk, symbols\n"); 
     847                return EXIT_FAILURE; 
    832848            } 
    833849            break; 
     
    850866            printf("  -o, --output <filename>   output resulting image to filename\n"); 
    851867            printf("  -l, --length <size>       message length in characters (default 140)\n"); 
     868            printf("  -c, --charset <block>     character set to use (ascii, [cjk], symbols)\n"); 
    852869            printf("  -q, --quality <rate>      set image quality (0 - 10) (default 5)\n"); 
    853870            printf("  -d, --debug               print debug information\n"); 
     
    879896    if(myoptind == argc - 1) 
    880897        srcname = argv[myoptind]; 
     898 
     899    /* Decoding mode: read UTF-8 text from stdin */ 
     900    if(dstname) 
     901        for(int i = 0; i < MAX_MSG_LEN; i++) 
     902            unicode_data[i] = fread_utf8(stdin); 
     903 
     904    /* Autodetect charset if decoding, otherwise switch to CJK. */ 
     905    if(!unichars) 
     906    { 
     907        if(dstname) 
     908        { 
     909            if(unicode_data[0] >= 0x0021 && unicode_data[0] < 0x007f) 
     910                unichars = unichars_ascii; 
     911            else if(unicode_data[0] >= 0x4e00 && unicode_data[0] < 0x9fa6) 
     912                unichars = unichars_cjk; 
     913            else if(unicode_data[0] >= 0x25a0 && unicode_data[0] < 0x27bf) 
     914                unichars = unichars_symbols; 
     915            else 
     916            { 
     917                fprintf(stderr, "Error: unable to detect charset\n"); 
     918                return EXIT_FAILURE; 
     919            } 
     920        } 
     921        else 
     922            unichars = unichars_cjk; 
     923    } 
    881924 
    882925    pipi_set_gamma(1.0); 
     
    904947    if(dstname) 
    905948    { 
    906         /* Decoding mode: read UTF-8 text from stdin, find each 
    907          * character's index in our character list, and push it to our 
    908          * wonderful custom bitstream. */ 
    909         uint32_t data[MAX_MSG_LEN]; 
    910         for(int i = 0; i < MAX_MSG_LEN; i++) 
    911             data[i] = uni2index(fread_utf8(stdin)); 
     949        /* Decoding mode: find each character's index in our character 
     950         * list, and push it to our wonderful custom bitstream. */ 
    912951        for(int i = MAX_MSG_LEN; i--; ) 
    913             b.push(data[i], NUM_CHARACTERS); 
     952            b.push(uni2index(unicode_data[i]), NUM_CHARACTERS); 
    914953 
    915954        /* Read width and height from bitstream */ 
     
    933972    } 
    934973 
    935     /* Compute best w/h ratio */ 
     974    /* Compute "best" w/h ratio */ 
    936975    dw = 1; dh = TOTAL_CELLS; 
    937976    for(unsigned int i = 1; i <= TOTAL_CELLS; i++) 
     
    11611200    } 
    11621201 
    1163     return ret; 
    1164 } 
    1165  
     1202    return EXIT_SUCCESS; 
     1203} 
     1204 
Note: See TracChangeset for help on using the changeset viewer.