Ignore:
Timestamp:
May 25, 2006, 10:01:10 PM (15 years ago)
Author:
Sam Hocevar
Message:
  • Have cucul_utf8_to_utf32 write how many bytes it read.
  • Use that in the SLang driver so that it supports UTF-8 input.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcaca/trunk/cucul/charset.c

    r963 r982  
    9696 *
    9797 *  This function converts a UTF-8 character read from a string and returns
    98  *  its value in the UTF-32 character set.
     98 *  its value in the UTF-32 character set. If the second argument is not null,
     99 *  the total number of read bytes is written in it.
     100 *
     101 *  If a null byte was reached before the expected end of the UTF-8 sequence,
     102 *  this function returns zero and the number of read bytes is set to zero.
    99103 *
    100104 *  This function never fails, but its behaviour with illegal UTF-8 sequences
     
    102106 *
    103107 *  \param s A string containing the UTF-8 character.
    104  *  \return The corresponding UTF-32 character.
    105  */
    106 unsigned long int cucul_utf8_to_utf32(char const *s)
    107 {
    108     int bytes = trailing[(int)(unsigned char)*s];
     108 *  \param read A pointer to an unsigned integer to store the number of
     109 *  bytes in the character, or NULL.
     110 *  \return The corresponding UTF-32 character, or zero if the character
     111 *  is incomplete.
     112 */
     113unsigned long int cucul_utf8_to_utf32(char const *s, unsigned int *read)
     114{
     115    unsigned int bytes = trailing[(int)(unsigned char)*s];
     116    unsigned int i = 0;
    109117    uint32_t ret = 0;
    110118
    111     switch(bytes)
    112     {
    113         /* FIXME: do something for invalid sequences (4 and 5) */
    114         case 3: ret += (uint8_t)*s++; ret <<= 6;
    115         case 2: ret += (uint8_t)*s++; ret <<= 6;
    116         case 1: ret += (uint8_t)*s++; ret <<= 6;
    117         case 0: ret += (uint8_t)*s++;
    118     }
    119 
    120     ret -= offsets[bytes];
    121 
    122     return ret;
     119    for(;;)
     120    {
     121        if(!*s)
     122        {
     123            if(read)
     124                *read = 0;
     125            return 0;
     126        }
     127
     128        ret += ((uint32_t)(unsigned char)*s++) << (6 * (bytes - i));
     129
     130        if(bytes == i++)
     131        {
     132            if(read)
     133                *read = i;
     134            return ret - offsets[bytes];
     135        }
     136    }
    123137}
    124138
Note: See TracChangeset for help on using the changeset viewer.